This commit is contained in:
Baudouin Chauviere 2019-07-09 14:35:51 -06:00
commit 4ca0967453
66 changed files with 9358 additions and 440 deletions

4
.gitignore vendored
View File

@ -32,3 +32,7 @@ vpr7_x2p/vpr/vpr
vpr7_x2p/printhandler/printhandlerdemo
vpr7_x2p/libarchfpga/read_arch
vpr7_x2p/pcre/pcredemo
# Some local temporary files
.vscode
*_local.bat

10
Dockerfile Executable file
View File

@ -0,0 +1,10 @@
FROM ubuntu:16.04
RUN apt-get update -qq -y
RUN apt-get -y install python3 python3-dev tcl tcl8.6-dev gawk libreadline-dev
RUN apt-get -y install autoconf automake bison build-essential cmake ctags curl doxygen flex fontconfig g++-4.9 gcc-4.9 gdb git gperf libffi-dev libcairo2-dev libevent-dev libfontconfig1-dev liblist-moreutils-perl libncurses5-dev libx11-dev libxft-dev libxml++2.6-dev perl texinfo time valgrind zip qt5-default
RUN git clone https://github.com/LNIS-Projects/OpenFPGA.git OpenFPGA
RUN cd OpenFPGA && make

49
ERI_demo/ERI.sh Executable file
View File

@ -0,0 +1,49 @@
#!/bin/bash
# Regression test version 1.0
# Set variables
my_pwd=$PWD
fpga_flow_scripts=${my_pwd}/fpga_flow/scripts
vpr_path=${my_pwd}/vpr7_x2p/vpr
benchmark="pipelined_8b_adder"
include_netlists="_include_netlists.v"
compiled_file="compiled_$benchmark"
tb_formal_postfix="_top_formal_verification_random_tb"
verilog_output_dirname="${vpr_path}${benchmark}_Verilog"
log_file="${benchmark}_sim.log"
new_reg_sh="${PWD}/ERI_demo/my_eri_demo.sh"
template_sh="${PWD}/ERI_demo/eri_demo.sh"
# Remove former log file
rm -f $log_file
rm -f $compiled_file
# Rewite script
cd $fpga_flow_scripts
perl rewrite_path_in_file.pl -i $template_sh -o $new_reg_sh
cd $my_pwd
# Start the script -> run the fpga generation -> run the simulation -> check the log file
source $new_reg_sh # Leave us in vpr folder
iverilog -o $compiled_file $verilog_output_dirname/SRC/$benchmark$include_netlists -s $benchmark$tb_formal_postfix
vvp $compiled_file -j 64 >> $log_file
result=`grep "Succeed" $log_file`
if ["$result" = ""]; then
result=`grep "Failed" $log_file`
if ["$result" = ""]; then
echo "Unexpected error, Verification didn't run"
cd $my_pwd
exit 1
else
echo "Verification failed"
cd $my_pwd
exit 2
fi
else
echo "Verification succeed"
cd $my_pwd
fi

52
ERI_demo/eri_demo.sh Normal file
View File

@ -0,0 +1,52 @@
#!/bin/bash
# Example of how to run vpr
# Set variables
# For FPGA-Verilog ONLY
benchmark="pipelined_8b_adder"
OpenFPGA_path="OPENFPGAPATHKEYWORD"
verilog_output_dirname="${benchmark}_Verilog"
verilog_output_dirpath="$vpr_path"
tech_file="${OpenFPGA_path}/fpga_flow/tech/PTM_45nm/45nm.xml"
# VPR critical inputs
template_arch_xml_file="${OpenFPGA_path}/fpga_flow/arch/template/k6_N10_sram_chain_HC_DPRAM_template.xml"
arch_xml_file="${OpenFPGA_path}/fpga_flow/arch/generated/k6_N10_sram_chain_HC_DPRAM.xml"
blif_file="${OpenFPGA_path}/ERI_demo/$benchmark.blif"
act_file="${OpenFPGA_path}/ERI_demo/$benchmark.act "
verilog_reference="${OpenFPGA_path}/ERI_demo/$benchmark.v"
vpr_route_chan_width="300"
fpga_flow_script="${OpenFPGA_path}/fpga_flow/scripts"
ff_path="$vpr_path/VerilogNetlists/ff.v"
new_ff_path="$verilog_output_dirpath/$verilog_output_dirname/SRC/ff.v"
ff_keyword="GENERATED_DIR_KEYWORD"
ff_include_path="$verilog_output_dirpath/$verilog_output_dirname"
arch_ff_keyword="FFPATHKEYWORD"
tb_formal_ext="_formal_random_top_tb.v"
formal_postfix="_top_formal_verification"
clk_unmapped="clk\[0:0\]"
clk_mapped="clk_fm"
# Remove previous designs
rm -rf $verilog_output_dirpath/$verilog_output_dirname
mkdir ${OpenFPGA_path}/fpga_flow/arch/generated
#cd $fpga_flow_scripts
perl rewrite_path_in_file.pl -i $template_arch_xml_file -o $arch_xml_file
perl rewrite_path_in_file.pl -i $arch_xml_file -k $arch_ff_keyword $new_ff_path
# Move to vpr folder
cd $vpr_path
# Run VPR
./vpr $arch_xml_file $blif_file --full_stats --activity_file $act_file --fpga_verilog --fpga_verilog_dir $verilog_output_dirpath/$verilog_output_dirname --fpga_x2p_rename_illegal_port --fpga_bitstream_generator --fpga_verilog_print_top_testbench --fpga_verilog_print_input_blif_testbench --fpga_verilog_include_timing --fpga_verilog_include_signal_init --fpga_verilog_print_formal_verification_top_netlist --fpga_verilog_print_autocheck_top_testbench $verilog_reference --fpga_verilog_print_user_defined_template --route_chan_width $vpr_route_chan_width --fpga_verilog_include_icarus_simulator --nodisp
cd $fpga_flow_scripts
perl rewrite_path_in_file.pl -i $ff_path -o $new_ff_path -k $ff_keyword $ff_include_path
rm $verilog_output_dirpath/$verilog_output_dirname/SRC/${benchmark}${tb_formal_ext}
perl rewrite_path_in_file.pl -i ${OpenFPGA_path}/ERI_demo/${benchmark}${tb_formal_ext} -o $verilog_output_dirpath/$verilog_output_dirname/SRC/${benchmark}${tb_formal_ext}
cd -
sed -i 's/^clk\[0:0\]/clk_fm/' $verilog_output_dirpath/$verilog_output_dirname/SRC/${benchmark}${formal_postfix}.v

42
ERI_demo/my_eri_demo.sh Normal file
View File

@ -0,0 +1,42 @@
#!/bin/bash
# Example of how to run vpr
# Set variables
# For FPGA-Verilog ONLY
benchmark="pipelined_32b_adder"
OpenFPGA_path="/research/ece/lnis/USERS/alacchi/Current_release/branch_multimode/OpenFPGA"
verilog_output_dirname="${benchmark}_Verilog"
verilog_output_dirpath="$vpr_path"
tech_file="${OpenFPGA_path}/fpga_flow/tech/PTM_45nm/45nm.xml"
# VPR critical inputs
template_arch_xml_file="${OpenFPGA_path}/fpga_flow/arch/template/k6_N10_sram_chain_HC_DPRAM_template.xml"
arch_xml_file="${OpenFPGA_path}/fpga_flow/arch/generated/k6_N10_sram_chain_HC_DPRAM.xml"
blif_file="${OpenFPGA_path}/ERI_demo/$benchmark.blif"
act_file="${OpenFPGA_path}/ERI_demo/$benchmark.act "
verilog_reference="${OpenFPGA_path}/ERI_demo/$benchmark.v"
vpr_route_chan_width="300"
fpga_flow_script="${OpenFPGA_path}/fpga_flow/scripts"
ff_path="$vpr_path/VerilogNetlists/ff.v"
new_ff_path="$verilog_output_dirpath/$verilog_output_dirname/SRC/ff.v"
ff_keyword="GENERATED_DIR_KEYWORD"
ff_include_path="$verilog_output_dirpath/$verilog_output_dirname"
arch_ff_keyword="FFPATHKEYWORD"
# Remove previous designs
#rm -rf $verilog_output_dirpath/$verilog_output_dirname
mkdir ${OpenFPGA_path}/fpga_flow/arch/generated
cd $fpga_flow_scripts
perl rewrite_path_in_file.pl -i $template_arch_xml_file -o $arch_xml_file
perl rewrite_path_in_file.pl -i $arch_xml_file -k $arch_ff_keyword $new_ff_path
# Move to vpr folder
cd $vpr_path
# Run VPR
./vpr $arch_xml_file $blif_file --full_stats --nodisp --activity_file $act_file --fpga_verilog --fpga_verilog_dir $verilog_output_dirpath/$verilog_output_dirname --fpga_x2p_rename_illegal_port --fpga_bitstream_generator --fpga_verilog_print_top_testbench --fpga_verilog_print_input_blif_testbench --fpga_verilog_include_timing --fpga_verilog_include_signal_init --fpga_verilog_print_formal_verification_top_netlist --fpga_verilog_print_autocheck_top_testbench $verilog_reference --fpga_verilog_print_user_defined_template --route_chan_width $vpr_route_chan_width --fpga_verilog_include_icarus_simulator #--fpga_verilog_print_report_timing_tcl --power --tech_properties $tech_file --fpga_verilog_print_sdc_pnr --fpga_verilog_print_sdc_analysis --fpga_x2p_compact_routing_hierarchy
cd $fpga_flow_scripts
perl rewrite_path_in_file.pl -i $ff_path -o $new_ff_path -k $ff_keyword $ff_include_path
cd -

View File

@ -0,0 +1,95 @@
clk 0.5 0.2
wen 0.5 0.2
wen_st0 0.5 0.2
wen_st1 0.5 0.2
ren 0.5 0.2
raddr[0] 0.5 0.2
raddr[1] 0.5 0.2
raddr[2] 0.5 0.2
raddr[3] 0.5 0.2
raddr[4] 0.5 0.2
raddr[5] 0.5 0.2
waddr[0] 0.5 0.2
waddr[1] 0.5 0.2
waddr[2] 0.5 0.2
waddr[3] 0.5 0.2
waddr[4] 0.5 0.2
waddr[5] 0.5 0.2
waddr_st0[0] 0.5 0.2
waddr_st0[1] 0.5 0.2
waddr_st0[2] 0.5 0.2
waddr_st0[3] 0.5 0.2
waddr_st0[4] 0.5 0.2
waddr_st0[5] 0.5 0.2
waddr_st1[0] 0.5 0.2
waddr_st1[1] 0.5 0.2
waddr_st1[2] 0.5 0.2
waddr_st1[3] 0.5 0.2
waddr_st1[4] 0.5 0.2
waddr_st1[5] 0.5 0.2
a[0] 0.5 0.2
a[1] 0.5 0.2
a[2] 0.5 0.2
a[3] 0.5 0.2
a[4] 0.5 0.2
a[5] 0.5 0.2
a[6] 0.5 0.2
a_st0[0] 0.5 0.2
a_st0[1] 0.5 0.2
a_st0[2] 0.5 0.2
a_st0[3] 0.5 0.2
a_st0[4] 0.5 0.2
a_st0[5] 0.5 0.2
a_st0[6] 0.5 0.2
a_st1[0] 0.5 0.2
a_st1[1] 0.5 0.2
a_st1[2] 0.5 0.2
a_st1[3] 0.5 0.2
a_st1[4] 0.5 0.2
a_st1[5] 0.5 0.2
a_st1[6] 0.5 0.2
b[0] 0.5 0.2
b[1] 0.5 0.2
b[2] 0.5 0.2
b[3] 0.5 0.2
b[4] 0.5 0.2
b[5] 0.5 0.2
b[6] 0.5 0.2
b_st0[0] 0.5 0.2
b_st0[1] 0.5 0.2
b_st0[2] 0.5 0.2
b_st0[3] 0.5 0.2
b_st0[4] 0.5 0.2
b_st0[5] 0.5 0.2
b_st0[6] 0.5 0.2
b_st1[0] 0.5 0.2
b_st1[1] 0.5 0.2
b_st1[2] 0.5 0.2
b_st1[3] 0.5 0.2
b_st1[4] 0.5 0.2
b_st1[5] 0.5 0.2
b_st1[6] 0.5 0.2
q[0] 0.5 0.2
q[1] 0.5 0.2
q[2] 0.5 0.2
q[3] 0.5 0.2
q[4] 0.5 0.2
q[5] 0.5 0.2
q[6] 0.5 0.2
q[7] 0.5 0.2
AplusB[0] 0.5 0.2
AplusB[1] 0.5 0.2
AplusB[2] 0.5 0.2
AplusB[3] 0.5 0.2
AplusB[4] 0.5 0.2
AplusB[5] 0.5 0.2
AplusB[6] 0.5 0.2
AplusB[7] 0.5 0.2
cint01 0.5 0.2
cint02 0.5 0.2
cint03 0.5 0.2
cint04 0.5 0.2
cint05 0.5 0.2
cint06 0.5 0.2
cint07 0.5 0.2
zero00 0 0

View File

@ -0,0 +1,137 @@
# Benchmark pipelined_32b_adder
.model pipelined_32b_adder
.inputs clk wen ren raddr[0] raddr[1] raddr[2] raddr[3] raddr[4] raddr[5] waddr[0] waddr[1] waddr[2] waddr[3] waddr[4] waddr[5] a[0] a[1] a[2] a[3] a[4] a[5] a[6] b[0] b[1] b[2] b[3] b[4] b[5] b[6]
.outputs q[0] q[1] q[2] q[3] q[4] q[5] q[6] q[7]
# Start pipeline
# Pipeline a
.subckt shift D=a[0] clk=clk Q=a_st0[0]
.subckt shift D=a_st0[0] clk=clk Q=a_st1[0]
.subckt shift D=a[1] clk=clk Q=a_st0[1]
.subckt shift D=a_st0[1] clk=clk Q=a_st1[1]
.subckt shift D=a[2] clk=clk Q=a_st0[2]
.subckt shift D=a_st0[2] clk=clk Q=a_st1[2]
.subckt shift D=a[3] clk=clk Q=a_st0[3]
.subckt shift D=a_st0[3] clk=clk Q=a_st1[3]
.subckt shift D=a[4] clk=clk Q=a_st0[4]
.subckt shift D=a_st0[4] clk=clk Q=a_st1[4]
.subckt shift D=a[5] clk=clk Q=a_st0[5]
.subckt shift D=a_st0[5] clk=clk Q=a_st1[5]
.subckt shift D=a[6] clk=clk Q=a_st0[6]
.subckt shift D=a_st0[6] clk=clk Q=a_st1[6]
# Pipeline b
.subckt shift D=b[0] clk=clk Q=b_st0[0]
.subckt shift D=b_st0[0] clk=clk Q=b_st1[0]
.subckt shift D=b[1] clk=clk Q=b_st0[1]
.subckt shift D=b_st0[1] clk=clk Q=b_st1[1]
.subckt shift D=b[2] clk=clk Q=b_st0[2]
.subckt shift D=b_st0[2] clk=clk Q=b_st1[2]
.subckt shift D=b[3] clk=clk Q=b_st0[3]
.subckt shift D=b_st0[3] clk=clk Q=b_st1[3]
.subckt shift D=b[4] clk=clk Q=b_st0[4]
.subckt shift D=b_st0[4] clk=clk Q=b_st1[4]
.subckt shift D=b[5] clk=clk Q=b_st0[5]
.subckt shift D=b_st0[5] clk=clk Q=b_st1[5]
.subckt shift D=b[6] clk=clk Q=b_st0[6]
.subckt shift D=b_st0[6] clk=clk Q=b_st1[6]
# Pipeline waddr
.subckt shift D=waddr[0] clk=clk Q=waddr_st0[0]
.subckt shift D=waddr_st0[0] clk=clk Q=waddr_st1[0]
.subckt shift D=waddr[1] clk=clk Q=waddr_st0[1]
.subckt shift D=waddr_st0[1] clk=clk Q=waddr_st1[1]
.subckt shift D=waddr[2] clk=clk Q=waddr_st0[2]
.subckt shift D=waddr_st0[2] clk=clk Q=waddr_st1[2]
.subckt shift D=waddr[3] clk=clk Q=waddr_st0[3]
.subckt shift D=waddr_st0[3] clk=clk Q=waddr_st1[3]
.subckt shift D=waddr[4] clk=clk Q=waddr_st0[4]
.subckt shift D=waddr_st0[4] clk=clk Q=waddr_st1[4]
.subckt shift D=waddr[5] clk=clk Q=waddr_st0[5]
.subckt shift D=waddr_st0[5] clk=clk Q=waddr_st1[5]
# Pipeline wen
.subckt shift D=wen clk=clk Q=wen_st0
.subckt shift D=wen_st0 clk=clk Q=wen_st1
# End pipeline
# Start adder
.subckt adder a=a_st1[0] b=b_st1[0] cin=zero00 cout=cint01 sumout=AplusB[0]
.subckt adder a=a_st1[1] b=b_st1[1] cin=cint01 cout=cint02 sumout=AplusB[1]
.subckt adder a=a_st1[2] b=b_st1[2] cin=cint02 cout=cint03 sumout=AplusB[2]
.subckt adder a=a_st1[3] b=b_st1[3] cin=cint03 cout=cint04 sumout=AplusB[3]
.subckt adder a=a_st1[4] b=b_st1[4] cin=cint04 cout=cint05 sumout=AplusB[4]
.subckt adder a=a_st1[5] b=b_st1[5] cin=cint05 cout=cint06 sumout=AplusB[5]
.subckt adder a=a_st1[6] b=b_st1[6] cin=cint06 cout=cint07 sumout=AplusB[6]
.subckt adder a=zero00 b=zero00 cin=cint07 cout=unconn sumout=AplusB[7]
# End adder
# Start DPRAM
.subckt dpram clk=clk wen=wen_st1 ren=ren \
waddr[0]=waddr_st1[0] waddr[1]=waddr_st1[1] waddr[2]=waddr_st1[2] waddr[3]=waddr_st1[3] waddr[4]=waddr_st1[4] \
waddr[5]=waddr_st1[5] waddr[6]=zero00 waddr[7]=zero00 waddr[8]=zero00 waddr[9]=zero00 waddr[10]==zero00 \
raddr[0]=raddr[0] raddr[1]=raddr[1] raddr[2]=raddr[2] raddr[3]=raddr[3] raddr[4]=raddr[4] raddr[5]=raddr[5] \
raddr[6]=zero00 raddr[7]=zero00 raddr[8]=zero00 raddr[9]=zero00 raddr[10]=zero00 \
d_in[0]=AplusB[0] d_in[1]=AplusB[1] d_in[2]=AplusB[2] d_in[3]=AplusB[3] d_in[4]=AplusB[4] d_in[5]=AplusB[5] \
d_in[6]=AplusB[6] d_in[7]=AplusB[7] d_in[8]=zero00 d_in[9]=zero00 d_in[10]=zero00 d_in[11]=zero00 \
d_in[12]=zero00 d_in[13]=zero00 d_in[14]=zero00 d_in[15]=zero00 d_in[16]=zero00 d_in[17]=zero00 \
d_in[18]=zero00 d_in[19]=zero00 d_in[20]=zero00 d_in[21]=zero00 d_in[22]=zero00 d_in[23]=zero00 \
d_in[24]=zero00 d_in[25]=zero00 d_in[26]=zero00 d_in[27]=zero00 d_in[28]=zero00 d_in[29]=zero00 \
d_in[30]=zero00 d_in[31]=zero00 \
d_in[32]=zero00 d_in[33]=zero00 d_in[34]=zero00 d_in[35]=zero00 d_in[36]=zero00 d_in[37]=zero00 d_in[38]=zero00 d_in[39]=zero00 d_in[40]=zero00 d_in[41]=zero00 d_in[42]=zero00 d_in[43]=zero00 d_in[44]=zero00 d_in[45]=zero00 d_in[46]=zero00 d_in[47]=zero00 d_in[48]=zero00 d_in[49]=zero00 d_in[50]=zero00 d_in[51]=zero00 d_in[52]=zero00 d_in[53]=zero00 d_in[54]=zero00 d_in[55]=zero00 d_in[56]=zero00 d_in[57]=zero00 d_in[58]=zero00 d_in[59]=zero00 d_in[60]=zero00 d_in[61]=zero00 d_in[62]=zero00 d_in[63]=zero00 \
d_out[0]=q[0] d_out[1]=q[1] d_out[2]=q[2] d_out[3]=q[3] d_out[4]=q[4] d_out[5]=q[5] \
d_out[6]=q[6] d_out[7]=q[7] d_out[8]=unconn d_out[9]=unconn d_out[10]=unconn \
d_out[11]=unconn d_out[12]=unconn d_out[13]=unconn d_out[14]=unconn d_out[15]=unconn \
d_out[16]=unconn d_out[17]=unconn d_out[18]=unconn d_out[19]=unconn d_out[20]=unconn \
d_out[21]=unconn d_out[22]=unconn d_out[23]=unconn d_out[24]=unconn d_out[25]=unconn \
d_out[26]=unconn d_out[27]=unconn d_out[28]=unconn d_out[29]=unconn d_out[30]=unconn d_out[31]=unconn \
d_out[32]=unconn d_out[33]=unconn d_out[34]=unconn d_out[35]=unconn d_out[36]=unconn d_out[37]=unconn d_out[38]=unconn d_out[39]=unconn d_out[40]=unconn d_out[41]=unconn d_out[42]=unconn d_out[43]=unconn d_out[44]=unconn d_out[45]=unconn d_out[46]=unconn d_out[47]=unconn d_out[48]=unconn d_out[49]=unconn d_out[50]=unconn d_out[51]=unconn d_out[52]=unconn d_out[53]=unconn d_out[54]=unconn d_out[55]=unconn d_out[56]=unconn d_out[57]=unconn d_out[58]=unconn d_out[59]=unconn d_out[60]=unconn d_out[61]=unconn d_out[62]=unconn d_out[63]=unconn
# End DPRAM
# Start global variable
.names zero00
0
# End global variable
.end
# Start blackbox definition
.model dpram
.inputs clk wen ren waddr[0] waddr[1] waddr[2] waddr[3] waddr[4] waddr[5] \
waddr[6] waddr[7] waddr[8] waddr[9] waddr[10] raddr[0] raddr[1] raddr[2] \
raddr[3] raddr[4] raddr[5] raddr[6] raddr[7] raddr[8] raddr[9] raddr[10] \
d_in[0] d_in[1] d_in[2] d_in[3] d_in[4] d_in[5] d_in[6] d_in[7] d_in[8] \
d_in[9] d_in[10] d_in[11] d_in[12] d_in[13] d_in[14] d_in[15] d_in[16] \
d_in[17] d_in[18] d_in[19] d_in[20] d_in[21] d_in[22] d_in[23] d_in[24] \
d_in[25] d_in[26] d_in[27] d_in[28] d_in[29] d_in[30] d_in[31] d_in[32] \
d_in[33] d_in[34] d_in[35] d_in[36] d_in[37] d_in[38] d_in[39] d_in[40] \
d_in[41] d_in[42] d_in[43] d_in[44] d_in[45] d_in[46] d_in[47] d_in[48] \
d_in[49] d_in[50] d_in[51] d_in[52] d_in[53] d_in[54] d_in[55] d_in[56] \
d_in[57] d_in[58] d_in[59] d_in[60] d_in[61] d_in[62] d_in[63]
.outputs d_out[0] d_out[1] d_out[2] d_out[3] d_out[4] d_out[5] d_out[6] \
d_out[7] d_out[8] d_out[9] d_out[10] d_out[11] d_out[12] d_out[13] \
d_out[14] d_out[15] d_out[16] d_out[17] d_out[18] d_out[19] d_out[20] \
d_out[21] d_out[22] d_out[23] d_out[24] d_out[25] d_out[26] d_out[27] \
d_out[28] d_out[29] d_out[30] d_out[31] d_out[32] d_out[33] d_out[34] \
d_out[35] d_out[36] d_out[37] d_out[38] d_out[39] d_out[40] d_out[41] \
d_out[42] d_out[43] d_out[44] d_out[45] d_out[46] d_out[47] d_out[48] \
d_out[49] d_out[50] d_out[51] d_out[52] d_out[53] d_out[54] d_out[55] \
d_out[56] d_out[57] d_out[58] d_out[59] d_out[60] d_out[61] d_out[62] \
d_out[63]
.blackbox
.end
.model adder
.inputs a b cin
.outputs cout sumout
.blackbox
.end
.model shift
.inputs D clk
.outputs Q
.blackbox
.end
# End blackbox definition

View File

@ -0,0 +1,63 @@
/////////////////////////////////////
// //
// ERI summit demo-benchmark //
// pipelined_8b_adder.v //
// by Aurelien //
// //
/////////////////////////////////////
`timescale 1 ns/ 1 ps
module pipelined_8b_adder(
clk,
raddr,
waddr,
ren,
wen,
a,
b,
q );
input clk;
input[5:0] raddr;
input[5:0] waddr;
input ren;
input wen;
input[6:0] a;
input[6:0] b;
output[7:0] q;
reg[63:0] ram[7:0];
reg[6:0] a_st0;
reg[6:0] a_st1;
reg[6:0] b_st0;
reg[6:0] b_st1;
reg[8:0] waddr_st0;
reg[8:0] waddr_st1;
reg wen_st0;
reg wen_st1;
reg[7:0] q_int;
wire[7:0] AplusB;
assign AplusB = a_st1 + b_st1;
assign q = q_int;
always@(posedge clk) begin
waddr_st0 <= waddr;
waddr_st1 <= waddr_st0;
a_st0 <= a;
a_st1 <= a_st0;
b_st0 <= b;
b_st1 <= b_st0;
wen_st0 <= wen;
wen_st1 <= wen_st0;
if(wen_st1) begin
ram[waddr_st1] <= AplusB;
end
if(ren) begin
q_int <= ram[raddr];
end
end
endmodule

View File

@ -0,0 +1,219 @@
`timescale 1 ns/ 100 ps
`include "OPENFPGAPATHKEYWORD/ERI_demo/pipelined_8b_adder.v"
module pipelined_8b_adder_top_formal_verification_random_tb();
reg clk;
reg[5:0] raddr;
reg[5:0] waddr;
reg ren;
reg wen;
reg[6:0] a;
reg[6:0] b;
wire[7:0] q_gfpga;
wire[7:0] q_bench;
reg[7:0] q_flag;
pipelined_8b_adder_top_formal_verification DUT(
.clk_fm (clk),
.raddr_0__fm (raddr[0]),
.raddr_1__fm (raddr[1]),
.raddr_2__fm (raddr[2]),
.raddr_3__fm (raddr[3]),
.raddr_4__fm (raddr[4]),
.raddr_5__fm (raddr[5]),
.waddr_0__fm (waddr[0]),
.waddr_1__fm (waddr[1]),
.waddr_2__fm (waddr[2]),
.waddr_3__fm (waddr[3]),
.waddr_4__fm (waddr[4]),
.waddr_5__fm (waddr[5]),
.ren_fm (ren),
.wen_fm (wen),
.a_0__fm (a[0]),
.a_1__fm (a[1]),
.a_2__fm (a[2]),
.a_3__fm (a[3]),
.a_4__fm (a[4]),
.a_5__fm (a[5]),
.a_6__fm (a[6]),
.b_0__fm (b[0]),
.b_1__fm (b[1]),
.b_2__fm (b[2]),
.b_3__fm (b[3]),
.b_4__fm (b[4]),
.b_5__fm (b[5]),
.b_6__fm (b[6]),
.out_q_0__fm (q_gfpga[0]),
.out_q_1__fm (q_gfpga[1]),
.out_q_2__fm (q_gfpga[2]),
.out_q_3__fm (q_gfpga[3]),
.out_q_4__fm (q_gfpga[4]),
.out_q_5__fm (q_gfpga[5]),
.out_q_6__fm (q_gfpga[6]),
.out_q_7__fm (q_gfpga[7])
);
pipelined_8b_adder ref0(
.clk (clk),
.raddr (raddr),
.waddr (waddr),
.ren (ren),
.wen (wen),
.a (a),
.b (b),
.q (q_bench)
);
integer nb_error = 0;
integer count = 0;
integer lim_max = 64 - 1;
integer write_complete = 0;
//----- Initialization
initial begin
clk <= 1'b0;
a <= 7'h00;
b <= 7'h00;
wen <= 1'b0;
ren <= 1'b0;
waddr <= 9'h000;
raddr <= 9'h000;
while(1) begin
#2.5
clk <= !clk;
end
end
//----- Input Stimulis
always@(negedge clk) begin
if(write_complete == 0) begin
wen <= 1'b1;
ren <= 1'b0;
count <= count + 1;
waddr <= waddr + 1;
if(count == lim_max) begin
write_complete = 1;
end
end else begin
wen <= $random;
ren <= $random;
waddr <= $random;
raddr <= $random;
end
a <= $random;
b <= $random;
end
always@(negedge clk) begin
if(!(q_gfpga[0] === q_bench[0]) && !(q_bench[0] === 1'bx)) begin
q_flag[0] <= 1'b1;
end else begin
q_flag[0] <= 1'b0;
end
if(!(q_gfpga[1] === q_bench[1]) && !(q_bench[1] === 1'bx)) begin
q_flag[1] <= 1'b1;
end else begin
q_flag[1] <= 1'b0;
end
if(!(q_gfpga[2] === q_bench[2]) && !(q_bench[2] === 1'bx)) begin
q_flag[2] <= 1'b1;
end else begin
q_flag[2] <= 1'b0;
end
if(!(q_gfpga[3] === q_bench[3]) && !(q_bench[3] === 1'bx)) begin
q_flag[3] <= 1'b1;
end else begin
q_flag[3] <= 1'b0;
end
if(!(q_gfpga[4] === q_bench[4]) && !(q_bench[4] === 1'bx)) begin
q_flag[4] <= 1'b1;
end else begin
q_flag[4] <= 1'b0;
end
if(!(q_gfpga[5] === q_bench[5]) && !(q_bench[5] === 1'bx)) begin
q_flag[5] <= 1'b1;
end else begin
q_flag[5] <= 1'b0;
end
if(!(q_gfpga[6] === q_bench[6]) && !(q_bench[6] === 1'bx)) begin
q_flag[6] <= 1'b1;
end else begin
q_flag[6] <= 1'b0;
end
if(!(q_gfpga[7] === q_bench[7]) && !(q_bench[7] === 1'bx)) begin
q_flag[7] <= 1'b1;
end else begin
q_flag[7] <= 1'b0;
end
end
always@(posedge q_flag[0]) begin
if(q_flag[0]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[0] at time = %t", $realtime);
end
end
always@(posedge q_flag[1]) begin
if(q_flag[1]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[1] at time = %t", $realtime);
end
end
always@(posedge q_flag[2]) begin
if(q_flag[2]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[2] at time = %t", $realtime);
end
end
always@(posedge q_flag[3]) begin
if(q_flag[3]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[3] at time = %t", $realtime);
end
end
always@(posedge q_flag[4]) begin
if(q_flag[4]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[4] at time = %t", $realtime);
end
end
always@(posedge q_flag[5]) begin
if(q_flag[5]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[5] at time = %t", $realtime);
end
end
always@(posedge q_flag[6]) begin
if(q_flag[6]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[6] at time = %t", $realtime);
end
end
always@(posedge q_flag[7]) begin
if(q_flag[7]) begin
nb_error = nb_error + 1;
$display("Mismatch on q_gfpga[7] at time = %t", $realtime);
end
end
initial begin
$dumpfile("pipelined_8b_adder_formal.vcd");
$dumpvars(1, pipelined_8b_adder_top_formal_verification_random_tb);
end
initial begin
$timeformat(-9, 2, "ns", 20);
$display("Simulation start");
#1500 // Can be changed by the user for his need
if(nb_error == 0) begin
$display("Simulation Succeed");
end else begin
$display("Simulation Failed with %d error(s)", nb_error);
end
$finish;
end
endmodule

View File

@ -1,23 +1,28 @@
# Getting Started with FPGA-SPICE
# Getting Started with OpenFPGA
[![Build Status](https://travis-ci.org/LNIS-Projects/OpenFPGA.svg?branch=master)](https://travis-ci.org/LNIS-Projects/OpenFPGA)
[![Documentation Status](https://readthedocs.org/projects/openfpga/badge/?version=master)](https://openfpga.readthedocs.io/en/master/?badge=master)
## Introduction
FPGA-SPICE is an extension to VPR. It is an IP Verilog Generator allowing reliable and fast testing of heterogeneous architectures.
OpenFPGA is an extension to VPR. It is an IP Verilog Generator allowing reliable and fast testing of homogeneous architectures.
## Compilation
The different ways of compiling can be found in the **./compilation** folder.
The different ways of compiling can be found in the **./compilation** folder.
We currently implemented it for:
**Compilation steps:**
1. Create a folder named build in OpenPFGA repository (mkdir build && cd build)
2. Create Makefile in this folder using cmake (cmake ..)
3. Compile the tool and its dependencies (make)
1. Ubuntu 18.04
2. Red Hat 7.5
3. MacOS High Sierra 10.13.4
*We currently implemented OpenFPGA for:*
Please note that those were the versions we tested the software for. It might work with earlier versions and other distributions.
*1. Ubuntu 18.04*
*2. Red Hat 7.5*
*3. MacOS High Sierra 10.13.4*
*Please note that those were the versions we tested the software for. It might work with earlier versions and other distributions.*
## Documentation
OpenFPGA's [full documentation](https://openfpga.readthedocs.io/en/master/) includes tutorials, descriptions of the design flow, and tool options.

View File

@ -32,9 +32,9 @@ This will show the different options that can be used. Our modifications concern
A script is already prepared in the folder to test FPGA-SPICE and FPGA-Verilog
`source ./go.sh`
`source ./go_fpga_verilog.sh`
This script uses the enhanced version of vpr with some new options such as --fpga_spice_print_top_testbench which automatically generates a testbench for the full FPGA and --fpga_verilog_dir which allows us to choose the destination directory for the verilog output we generate.
This script uses the enhanced version of vpr with some new options such as --fpga_verilog_print_top_testbench which automatically generates a testbench for the full FPGA and --fpga_verilog_dir which allows us to choose the destination directory for the verilog output we generate.
For more informations on how the new commands work, please visit [OpenFPGA Options FPGA-SPICE](https://openfpga.readthedocs.io/en/latest/fpga_spice/command_line_usage.html).
As a result, we get a new folder, /verilog_test, which contains the verilog code. The name_top.v contains the full FPGA we just created. Three other folders are created, *lb*, *routing* and *sub_modules*. *lb* contains the different CLBs used in the architecture. *routing* contains the different connection blocks, the switch boxes and the wires. *sub_modules* contains the different modules needed in the architecture.

View File

@ -32,9 +32,9 @@ This will show the different options that can be used. Our modifications concern
A script is already prepared in the folder to test FPGA-SPICE and FPGA-Verilog
`source ./go.sh`
`source ./go_fpga_verilog.sh`
This script uses the enhanced version of vpr with some new options such as --fpga_spice_print_top_testbench which automatically generates a testbench for the full FPGA and --fpga_verilog_dir which allows us to choose the destination directory for the verilog output we generate.
This script uses the enhanced version of vpr with some new options such as --fpga_verilog_print_top_testbench which automatically generates a testbench for the full FPGA and --fpga_verilog_dir which allows us to choose the destination directory for the verilog output we generate.
For more informations on how the new commands work, please visit [OpenFPGA Options FPGA-SPICE](https://openfpga.readthedocs.io/en/latest/fpga_spice/command_line_usage.html).
As a result, we get a new folder, /verilog_test, which contains the verilog code. The name_top.v contains the full FPGA we just created. Three other folders are created, *lb*, *routing* and *sub_modules*. *lb* contains the different CLBs used in the architecture. *routing* contains the different connection blocks, the switch boxes and the wires. *sub_modules* contains the different modules needed in the architecture.

View File

@ -29,9 +29,9 @@ This will show the different options that can be used. Our modifications concern
A script is already prepared in the folder to test FPGA-SPICE and FPGA-Verilog
`source ./go.sh`
`source ./go_fpga_verilog.sh`
This script uses the enhanced version of vpr with some new options such as --fpga_spice_print_top_testbench which automatically generates a testbench for the full FPGA and --fpga_verilog_dir which allows us to choose the destination directory for the verilog output we generate.
This script uses the enhanced version of vpr with some new options such as --fpga_verilog_print_top_testbench which automatically generates a testbench for the full FPGA and --fpga_verilog_dir which allows us to choose the destination directory for the verilog output we generate.
For more informations on how the new commands work, please visit [OpenFPGA Options FPGA-SPICE](https://openfpga.readthedocs.io/en/latest/fpga_spice/command_line_usage.html).
As a result, we get a new folder, /verilog_test, which contains the verilog code. The name_top.v contains the full FPGA we just created. Three other folders are created, *lb*, *routing* and *sub_modules*. *lb* contains the different CLBs used in the architecture. *routing* contains the different connection blocks, the switch boxes and the wires. *sub_modules* contains the different modules needed in the architecture.

View File

@ -18,7 +18,6 @@ FPGA-Bitstream is the part of the flow in charge of the functional verification
How to compile
==============
Running the Makefile in the root of the released package can compile all the source codes.
Guides can be found in the *compilation* directory in the main folder. We tested it for MacOS High Sierra 10.13.4, Ubuntu 18.04 and Red Hat 7.5. This list is not exhaustive as other distributions could work as well.
As a general rule, the compilation follows these steps:
@ -31,21 +30,24 @@ If you need the full flow:
2) Go into the folder you just cloned and make the different submodules through a global Makefile:
cd OpenFPGA
make
OR
make -j
(if you have multiple cores, this will make the compilation way faster.)
mkdir build (*if folder doesn't already exist*)
cd build
cmake ..
make OR make -j (*if you have multiple cores, this will make the compilation way faster*)
If you only need vpr:
cd OpenFPGA/vpr7_x2p
make/make -j
cd OpenFPGA
mkdir build (if folder doesn't already exist)
cd build
cmake ..
make vpr/make vpr -j
3) Architectures, circuits and already written scripts exist to allow you to test the flow without having to provide any new information to the system. For this:
cd vpr7_x2p (if not done already)
cd vpr7_x2p
cd vpr
source ./go.sh
source ./go_fpga_verilog/spice.sh
go.sh is a script linking to a testing architecture and a simple circuit. The output will be in the folders spice_demo and verilog_demo.
They are scripts linking to a testing architecture and a simple circuit.
4) If you only need to see the new options implemented in vpr, do:
./vpr

View File

@ -8,11 +8,12 @@ FPGA-Verilog Supported Options::
--fpga_verilog
--fpga_verilog_dir <directory_path_of_dumped_verilog_files>
--fpga_verilog_include_timing
--fpga_verilog_init_sim
--fpga_verilog_print_modelsim_autodeck
--fpga_verilog_modelsim_ini_path <string>
--fpga_verilog_include_signal_init
--fpga_verilog_print_modelsim_autodeck <modelsim_ini_path>
--fpga_verilog_print_top_testbench
--fpga_verilog_print_top_auto_testbench <path_to_the_verilog_benchmark>
--fpga_verilog_print_autocheck_top_testbench <reference_verilog_file_path>
--fpga_verilog_print_formal_verification_top_netlist
--fpga_verilog_include_icarus_simulator
.. csv-table:: Commmand-line Options of FPGA-Verilog
@ -20,7 +21,8 @@ FPGA-Verilog Supported Options::
:widths: 15, 30
"--fpga_verilog", "Turn on the FPGA-Verilog."
"--fpga_verilog_dir <dir_path>", "Specify the directory that all the Verilog files will be outputted to. <dir_path> is the destination directory."
<<<<<<< HEAD
"--fpga_verilog_dir <dir_path>", "Specify the directory that all the Verilog files will be outputted to <dir_path> is the destination directory."
"--fpga_verilog_include_timing", "Includes the timings found in the XML file."
"--fpga_verilog_init_sim", "Initializes the simulation for ModelSim."
"--fpga_verilog_print_modelsim_autodeck", "Generates the scripts necessary to the ModelSim simulation."
@ -28,6 +30,17 @@ FPGA-Verilog Supported Options::
"--fpga_verilog_print_top_testbench", "Print the full-chip-level testbench for the FPGA. Determines the type of autodeck."
"--fpga_verilog_print_top_auto_testbench \
<path_to_the_verilog_benchmark>", "Prints the testbench associated with the given benchmark. Determines the type of autodeck."
=======
"--fpga_verilog_dir <dir_path>", "Specify the directory where all the Verilog files will be outputted to. <dir_path> is the destination directory."
"--fpga_verilog_include_timing", "Includes the timings found in the XML architecture description file."
"--fpga_verilog_include_signal_init", "Set all nets to random value to be close of a real power-on case"
"--fpga_verilog_print_modelsim_autodeck <modelsim_ini_path>", "Generates the scripts necessary to the ModelSim simulation and specify the path to modelsim.ini file."
"--fpga_verilog_print_top_testbench", "Prints the full-chip-level testbench for the FPGA, which includes programming phase and operationg phase (random patterns)."
"--fpga_verilog_print_autocheck_top_testbench \
<reference_verilog_file_path>", "Prints a testbench stimulating the generated FPGA and the initial benchmark to compare stimuli responses, which includes programming phase and operationg phase (random patterns)"
"--fpga_verilog_print_formal_verification_top_netlist", "Prints a Verilog top file compliant with formal verification tools. With this top file the FPGA is initialy programmed. It also prints a testbench with random patterns, which can be manually or automatically check regarding previous options."
"--fpga_verilog_include_icarus_simulator", "Activates waveforms .vcd file generation and simulation timeout, which are required for Icarus Verilog simulator"
>>>>>>> f56adc681567b73c7826228641e089482dffc009
.. note:: The selected directory will contain the *Verilog top file* and three other folders. The folders are:

View File

@ -1,15 +1,20 @@
Hierarchy of Verilog Output Files
============================
All the generated Verilog Netlists are located in the <verilog_dir> as you specify in the command-line options. Under the <verilog_dir>, FPGA-Verilog creates the top file name_top.v and some folders: lb (logic blocks), routing and sub_modules.
All the generated Verilog Netlists are located in the <verilog_dir>/SRC as you specify in the command-line options. Under the <verilog_dir>/SRC, FPGA-Verilog creates the top file name_top.v and some folders: lb (logic blocks), routing and sub_modules.
.. csv-table:: Folder hierarchy of FPGA-Verilog
:header: "Folder", "Content"
:header: "File/Folder", "Content"
:widths: 10, 20
"name_top.v", "Contains the top module and calls all the other .v files"
"name.bitstream", "Only if --fpga_verilog_print_top_testbench or --fpga_verilog_print_top_auto_testbench is chosen. Contains the bitstream programming the generated FPGA."
"name_top_tb.v", "Only if --fpga_verilog_print_top_testbench or --fpga_verilog_print_top_auto_testbench is chosen. Contains the testbench used for the simulation."
"name_top_tb.v", "Only if --fpga_verilog_print_top_testbench. Contains a testbench used for the simulation."
"name_autocheck_top_tb.v", "Only if --fpga_verilog_print_autocheck_top_testbench is chosen. Contains a testbench used for the simulation."
"name_formal_random_top_tb.v", "Only if --fpga_verilog_print_formal_verification_top_netlist is chosen. Contains a testbench used for the simulation."
"name_top_formal_verification.v", "Only if --fpga_verilog_print_formal_verification_top_netlist is chosen. Contains a top fil used for formal verification and by name_formal_random_top_tb.v."
"fpga_defines.v", "Contains all the defines set as 'include_timing'"
"name_include_netlists.v", "Contains all the netlists and defines paths used for the simulation."
"lb", "Logic Block. Contains all the CLBs. The logic_block.v includes all the CLB and is called by the top module afterward."
"routing", "Contains all the routing in the circuit. You can find in it the Switch Boxes, the Connection Blocks and the routing needed to connect the different blocks. The routing.v file packs them all and is called by the top module."
"sub_modules", "Contains the modules generated by the flow to build the CLBs."

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
lattice_ultra_example_source/

View File

@ -0,0 +1,71 @@
#! /bin/bash
# Exit if error occurs
set -e
# Make sure a clear start
default_task='lattice_benchmark'
pwd_path="$PWD"
task_name=${1:-$default_task} # run task defined in argument else run default task
config_file="$PWD/configs/${task_name}.conf"
bench_txt="$PWD/benchmarks/List/${task_name}.txt"
rpt_file="$PWD/csv_rpts/fpga_spice/${task_name}.csv"
task_file="$PWD/vpr_fpga_spice_task_lists/${task_name}"
verilog_path="${PWD}/regression_${task_name}"
config_file_final=$(echo ${config_file/.conf/_final.conf})
# List of argument passed to FPGA flow
vpr_config_flags=(
'-N 10'
'-K 6'
'-ace_d 0.5'
'-multi_thread 1'
'-vpr_fpga_x2p_rename_illegal_port'
'-vpr_fpga_verilog'
'-vpr_fpga_bitstream_generator'
'-vpr_fpga_verilog_print_autocheck_top_testbench'
'-vpr_fpga_verilog_include_timing'
'-vpr_fpga_verilog_include_signal_init'
'-vpr_fpga_verilog_formal_verification_top_netlist'
'-fix_route_chan_width'
'-vpr_fpga_verilog_include_icarus_simulator'
'-power'
)
# vpr_config_flags+=("$@") # Append provided arguments
#=============== Argument Sanity Check =====================
#Check if script running in correct (OpenFPGA/fpga_flow) folder
if [[ $pwd_path != *"OpenFPGA/fpga_flow"* ]]; then
echo "Error : Execute script from OpenFPGA/fpga_flow project folder"
exitflag=1
fi
#Check if fconfig and benchmark_list file exists
for filepath in $config_file $bench_txt; do
if [ ! -f $filepath ]; then
echo "$filepath File not found!"
exitflag=1
fi
done
if [ -n "$exitflag" ]; then
echo "Terminating script . . . . . . "
exit 1
fi
#=======================================================
#======== Replace variables in config file =============
#Extract OpenFPGA Project Path and Escape
OPENFPGAPATHKEYWORD=$(echo "$(echo $pwd_path | sed 's/.OpenFPGA.*$//')/OpenFPGA" | sed 's/\//\\\//g')
# Create final config file with replaced keywords replaced variables
sed 's/OPENFPGAPATHKEYWORD/'"${OPENFPGAPATHKEYWORD}"'/g' $config_file >$config_file_final
#==================Clean result, change directory and execute ===============
cd ${pwd_path}/scripts
# perl fpga_flow.pl -conf ${config_file_final} -benchmark ${bench_txt} -rpt ${rpt_file} -vpr_fpga_verilog_dir $verilog_path $(echo "${vpr_config_flags[@]}")
perl fpga_flow.pl -conf ${config_file_final} -benchmark ${bench_txt} -rpt ${rpt_file} -N 10 -K 6 -ace_d 0.5 -multi_thread 1 -vpr_fpga_x2p_rename_illegal_port -vpr_fpga_verilog -vpr_fpga_verilog_dir $verilog_path -vpr_fpga_bitstream_generator -vpr_fpga_verilog_print_autocheck_top_testbench -vpr_fpga_verilog_include_timing -vpr_fpga_verilog_include_signal_init -vpr_fpga_verilog_formal_verification_top_netlist -fix_route_chan_width -vpr_fpga_verilog_include_icarus_simulator -power
echo "Netlists successfully generated and simulated"
exit 0

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ sub print_usage()
print " -add_default_clk\n";
print " -initial_blif <input_blif_path>\n";
print "\n";
return 1;
return 0;
}
sub opts_read()
@ -53,7 +53,7 @@ sub opts_read()
}
}
}
return 1;
return 0;
}
# Print a line of blif netlist
@ -432,15 +432,15 @@ sub scan_blif()
}
close($FIN2);
close($FOUT);
return 1;
return 0;
}
sub main()
{
&opts_read();
&scan_blif();
return 1;
return 0;
}
&main();
exit(1);
exit(0);

View File

@ -32,17 +32,17 @@ sub opts_read()
if ($#ARGV == -1){
print "Error: Not enough input argument!\n";
&print_usage();
exit(1);
exit(1);
} else {
for (my $iargv = 0; $iargv < $#ARGV+1; $iargv++){
if ("-i" eq $ARGV[$iargv]){
if ("-i" eq $ARGV[$iargv]){
$arch_file = $ARGV[$iargv+1];
$iargv++;
} elsif ("-o" eq $ARGV[$iargv]){
} elsif ("-o" eq $ARGV[$iargv]){
$new_arch_file = $ARGV[$iargv+1];
$overwrite = "FALSE";
$iargv++;
} elsif ("-k" eq $ARGV[$iargv]){
} elsif ("-k" eq $ARGV[$iargv]){
$keyword = $ARGV[$iargv+1];
$change_to = $ARGV[$iargv+2];
$default_keyword = "FALSE";
@ -77,8 +77,8 @@ sub save_original($)
my ($template) = @_;
my $renamed_template = "$template".".bak";
rename($template, $renamed_template);
return $renamed_template;
return $renamed_template;
}
sub findPath(){
@ -90,7 +90,7 @@ sub findPath(){
} else {
$path = "$path"."/"."$folders[$count]";
if($folders[$count] eq $folder_top){
print "$path\n";
#print "$path\n";
return $path;
}
}
@ -103,7 +103,7 @@ sub rewrite_file($ $)
my ($arch, $template) = @_;
open(IN, '<'.$template);
open(OUT, '>'.$arch);
if($default_keyword eq "TRUE"){
my $myPath = &findPath();
while(<IN>){
@ -125,7 +125,7 @@ sub main()
my $rewrite_needed = &rewriting_required_check($arch_file);
if($rewrite_needed == 1){
if($overwrite eq "TRUE"){
my $template_file = &save_original($arch_file);
my $template_file = &save_original($arch_file);
&rewrite_file($arch_file, $template_file);
} else {
&rewrite_file($new_arch_file, $arch_file);
@ -133,6 +133,6 @@ sub main()
}
return;
}
&main();
exit(1);
exit(0);

1
fpga_flow/tech/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
winbond90nm

2
run_local.bat Executable file
View File

@ -0,0 +1,2 @@
docker run -it --rm -v "%cd%":/localfile -w="/localfile/vpr7_x2p/vpr" goreganesh/open_fpga ./go_ganesh.sh
pause

View File

@ -931,9 +931,12 @@ struct s_arch {
bool tileable; /* Xifan TANG: tileable rr_graph support */
t_chan_width_dist Chans;
enum e_switch_block_type SBType;
enum e_switch_block_type SBSubType;
float R_minW_nmos;
float R_minW_pmos;
int Fs;
int SubFs;
boolean wire_opposite_side;
float C_ipin_cblock;
float T_ipin_cblock;
/* mrFPGA: Xifan TANG */

View File

@ -1,4 +1,39 @@
/* The XML parser processes an XML file into a tree data structure composed of *
/**********************************************************
* MIT License
*
* Copyright (c) 2018 LNIS - The University of Utah
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
***********************************************************************/
/************************************************************************
* Filename: read_xml_arch_file.c
* Created by: Jason Luu
* Change history:
* +-------------------------------------+
* | Date | Author | Notes
* +-------------------------------------+
* | 2019/07/02 | Xifan Tang | Modified to support passing_track_type for switch blocks
* +-------------------------------------+
***********************************************************************/
/************************************************************************
* The XML parser processes an XML file into a tree data structure composed of *
* ezxml_t nodes. Each ezxml_t node represents an XML element. For example *
* <a> <b/> </a> will generate two ezxml_t nodes. One called "a" and its *
* child "b". Each ezxml_t node can contain various XML data such as attribute *
@ -22,7 +57,7 @@
* Because of how the XML tree traversal works, we free everything when we're *
* done reading an architecture file to make sure that there isn't some part *
* of the architecture file that got missed. *
*/
***********************************************************************/
#include <string.h>
#include <assert.h>
@ -2142,6 +2177,33 @@ static void ProcessDevice(INOUTP ezxml_t Node, OUTP struct s_arch *arch,
arch->Fs = GetIntProperty(Cur, "fs", TRUE, 3);
/* SubType is the switch block type of passing tracks */
/* By default, the subType is the same as the main type */
Prop = FindProperty(Cur, "sub_type", FALSE);
if (NULL != Prop) {
if (strcmp(Prop, "wilton") == 0) {
arch->SBSubType = WILTON;
} else if (strcmp(Prop, "universal") == 0) {
arch->SBSubType = UNIVERSAL;
} else if (strcmp(Prop, "subset") == 0) {
arch->SBSubType = SUBSET;
} else {
vpr_printf(TIO_MESSAGE_ERROR,
"[LINE %d] Unknown property %s for switch block type x\n",
Cur->line, Prop);
exit(1);
}
}
ezxml_set_attr(Cur, "sub_type", NULL);
/* SubFs is Fs for the switch block type of passing tracks */
/* By default, the subFs is the same as the main Fs */
arch->SubFs = GetIntProperty(Cur, "sub_fs", FALSE, arch->Fs);
/* A switch to allow passing tracks wired to the same routing channels */
arch->wire_opposite_side = GetBooleanProperty(Cur, "wire_opposite_side", FALSE, FALSE);
ezxml_set_attr(Cur, "wire_opposite_side", NULL);
FreeNode(Cur);
}
@ -4132,3 +4194,6 @@ void SetupPinEquivalenceAutoDetect(ezxml_t Parent, t_type_descriptor* Type) {
return;
}
/************************************************************************
* End of file : read_xml_arch_file.c
***********************************************************************/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,882 @@
<!--
Flagship Heterogeneous Architecture with Carry Chains for VTR 7.0.
- 40 nm technology
- General purpose logic block:
K = 6, N = 10, fracturable 6 LUTs (can operate as one 6-LUT or two 5-LUTs with all 5 inputs shared)
with optionally registered outputs
Each 5-LUT has an arithemtic mode that converts it to a single-bit adder with both inputs driven by 4-LUTs (both 4-LUTs share all 4 inputs)
Carry chain links to vertically adjacent logic blocks
- Memory size 32 Kbits, memory aspect ratios vary from a data width of 1 to data width of 64.
Height = 6, found on every (8n+2)th column
- Multiplier modes: one 36x36, two 18x18, each 18x18 can also operate as two 9x9.
Height = 4, found on every (8n+6)th column
- Routing architecture: L = 4, fc_in = 0.15, Fc_out = 0.1
Details on Modelling:
The electrical design of the architecture described here is NOT from an
optimized, SPICED architecture. Instead, we attempt to create a reasonable
architecture file by using an existing commercial FPGA to approximate the area,
delay, and power of the underlying components. This is combined with a reasonable 40 nm
model of wiring and circuit design for low-level routing components, where available.
The resulting architecture has delays that roughly match a commercial 40 nm FPGA, but also
has wiring electrical parameters that allow the wire lengths and switch patterns to be
modified and you will still get reasonable delay results for the new architecture.
The following describes, in detail, how we obtained the various electrical values for this
architecture.
Rmin for nmos and pmos, routing buffer sizes, and I/O pad delays are from the ifar
architecture created by Ian Kuon: K06 N10 45nm fc 0.15 area-delay optimized architecture.
(n10k06l04.fc15.area1delay1.cmos45nm.bptm.cmos45nm.xml)
This routing architecture was optimized for 45 nm, and we have scaled it linearly to 40 nm to
match the overall target (a 40 nm FPGA).
We obtain delay numbers by measuring delays of routing, soft logic blocks,
memories, and multipliers from test circuits on a Stratix IV GX device
(EP4SGX230DF29C2X, i.e. fastest speed grade). For routing, we took the average delay of H4 and V4
wires. Rmetal and Cmetal values for the routing wires were obtained from work done by Charles
Chiasson. We use a 96 nm half-pitch (corresponding to mid-level metal stack 40 nm routing) and
take the R and C data from the ITRS roadmap.
For the general purpose logic block, we assume that the area and delays of the Stratix IV
crossbar is close enough to the crossbar modelled here. We use 40 inputs and 20 feedback lines in
the cluster and a full crossbar, leading to 53:1 multiplexers in front of each BLE input.
Stratix IV uses 52 inputs and 20 feedback lines, but only a half-populated crossbar, leading to
36:1 multiplexers. We require 60 such multiplexers, while Stratix IV requires 88 for its more
complex fracturable BLEs + the extra control signals. We justify this rough approximation as follows:
The Stratix IV crossbar has more inputs (72 vs. 60) and
outputs (88 vs. 60) than our full crossbar which should increase its area and delay, but the
Stratix IV crossbar is also 50% sparse (each mux is 36:1 instead of 53:1) which should reduce its
area and delay. The total number of crossbar switch points is roughly similar between the two
architectures (3160 for SIV and 3600 for the academic architecture below), so we use the area
& delay of the Stratix IV crossbar as a rough approximation of our crossbar.
For LUTs, we include LUT
delays measured from Stratix IV which is dependant on the input used (ie. some
LUT inputs are faster than others). The CAD tools at the time of VTR 7 does
not consider differences in LUT input delays.
Adder delays obtained as approximate values from a Stratix IV EP4SE230F29C3 device.
Delay obtained by compiling a 256 bit adder (registered inputs and outputs,
all pins except clock virtual) then measuring the delays in chip-planner,
sumout delay = 0.271ns to 0.348 ns, intra-block carry delay = 0.011 ns,
inter-block carry delay = 0.327 ns. Given this data, I will approximate
sumout 0.3 ns, intra-block carry-delay = 0.01 ns, and
inter-block carry-delay = 0.16 ns (since Altera inter-block carry delay has
overhead that we don't have, I'll approximate the delay of a simpler chain at
one half what they have. This is very rough, anything from 0.01ns to 0.327ns
can be justified).
Logic block area numbers obtained by scaling overall tile area of a 65nm
Stratix III device, (as given in Wong, Betz and Rose, FPGA 2011) to 40 nm, then subtracting out
routing area at a channel width of 300. We use a channel width of 300 because it can route
all the VTR 6.0 benchmark circuits with an approximately 20% safety margin, and is also close to the
total channel width of Stratix IV. Hence this channel width is close to the commercial practice of
choosing a width that provides high routability. The architecture can be routed at different channel
widths, but we estimate the tile size and hence the physical length of routing wires assuming
a channel width of 300.
Sanity checks employed:
1. We confirmed the routing buffer delay is ~1/3rd of total routing delay at L = 4. This matches
common electrical design.
Authors: Jason Luu, Jeff Goeders, Vaughn Betz
-->
<architecture>
<!--
ODIN II specific config begins
Describes the types of user-specified netlist blocks (in blif, this corresponds to
".model [type_of_block]") that this architecture supports.
Note: Basic LUTs, I/Os, and flip-flops are not included here as there are
already special structures in blif (.names, .input, .output, and .latch)
that describe them.
-->
<models>
<model name="io">
<input_ports>
<port name="outpad"/>
</input_ports>
<output_ports>
<port name="inpad"/>
</output_ports>
</model>
<model name="adder">
<input_ports>
<port name="a"/>
<port name="b"/>
<port name="cin"/>
</input_ports>
<output_ports>
<port name="cout"/>
<port name="sumout"/>
</output_ports>
</model>
<model name="frac_lut6">
<input_ports>
<port name="in"/>
</input_ports>
<output_ports>
<port name="lut6_out"/>
<port name="lut5_out"/>
<port name="lut4_out"/>
</output_ports>
</model>
</models>
<!-- ODIN II specific config ends -->
<!-- Physical descriptions begin -->
<layout width="2" height="2"/>
<spice_settings>
<parameters>
<options sim_temp="25" post="off" captab="off" fast="on"/>
<monte_carlo mc_sim="off" num_mc_points="2" cmos_variation="off" rram_variation="off">
<cmos abs_variation="0.1" num_sigma="3"/>
<rram abs_variation="0.1" num_sigma="3"/>
</monte_carlo>
<measure sim_num_clock_cycle="auto" accuracy="1e-13" accuracy_type="abs">
<slew>
<rise upper_thres_pct="0.95" lower_thres_pct="0.05"/>
<fall upper_thres_pct="0.05" lower_thres_pct="0.95"/>
</slew>
<delay>
<rise input_thres_pct="0.5" output_thres_pct="0.5"/>
<fall input_thres_pct="0.5" output_thres_pct="0.5"/>
</delay>
</measure>
<stimulate>
<clock op_freq="auto" sim_slack="0.2" prog_freq="2.5e6">
<rise slew_time="20e-12" slew_type="abs"/>
<fall slew_time="20e-12" slew_type="abs"/>
</clock>
<input>
<rise slew_time="25e-12" slew_type="abs"/>
<fall slew_time="25e-12" slew_type="abs"/>
</input>
</stimulate>
</parameters>
<tech_lib lib_type="industry" transistor_type="TOP_TT" lib_path="/research/ece/lnis/USERS/tang/tangxifan-eda-tools/branches/subvt_fpga/process/tsmc40nm/toplevel.l" nominal_vdd="0.9" io_vdd="2.5"/>
<transistors pn_ratio="2" model_ref="M">
<nmos model_name="nch" chan_length="40e-9" min_width="140e-9"/>
<pmos model_name="pch" chan_length="40e-9" min_width="140e-9"/>
<io_nmos model_name="nch_25" chan_length="270e-9" min_width="320e-9"/>
<io_pmos model_name="pch_25" chan_length="270e-9" min_width="320e-9"/>
</transistors>
<module_circuit_models>
<circuit_model type="inv_buf" name="inv1" prefix="inv1" is_default="1">
<design_technology type="cmos" topology="inverter" size="1" tapered="off"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="inv_buf" name="buf4" prefix="buf4" is_default="0">
<design_technology type="cmos" topology="buffer" size="1" tapered="on" tap_drive_level="2" f_per_stage="4"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
</circuit_model>
<circuit_model type="inv_buf" name="tap_inv4" prefix="tap_inv4" is_default="0">
<design_technology type="cmos" topology="buffer" size="1" tapered="on" tap_drive_level="3" f_per_stage="4"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
</circuit_model>
<circuit_model type="pass_gate" name="tgate" prefix="tgate" is_default="1">
<design_technology type="cmos" topology="transmission_gate" nmos_size="1" pmos_size="2"/>
<input_buffer exist="off"/>
<output_buffer exist="off"/>
<port type="input" prefix="in" size="1"/>
<port type="input" prefix="sel" size="1"/>
<port type="input" prefix="selb" size="1"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in sel selb" out_port="out">
10e-12 0e-12 0e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in sel selb" out_port="out">
10e-12 0e-12 0e-12
</delay_matrix>
</circuit_model>
<circuit_model type="gate" name="or2" prefix="or2" is_default="1" verilog_netlist="/research/ece/lnis/USERS/alacchi/clone_github/tangxifan-eda-tools/branches/vpr7_rram/vpr/VerilogNetlists/sram.v/VerilogNetlists/essential_gates.v">
<design_technology type="cmos" topology="OR"/>
<input_buffer exist="off"/>
<output_buffer exist="off"/>
<port type="input" prefix="in" size="2"/>
<port type="output" prefix="out" size="1"/>
<delay_matrix type="rise" in_port="in" out_port="out">
10e-12
</delay_matrix>
<delay_matrix type="fall" in_port="in" out_port="out">
10e-12
</delay_matrix>
</circuit_model>
<circuit_model type="chan_wire" name="chan_segment" prefix="track_seg" is_default="1">
<design_technology type="cmos"/>
<input_buffer exist="off"/>
<output_buffer exist="off"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pie" res_val="101" cap_val="22.5e-15" level="1"/> <!-- model_type could be T, res_val and cap_val DON'T CARE -->
</circuit_model>
<circuit_model type="wire" name="direct_interc" prefix="direct_interc" is_default="1">
<design_technology type="cmos"/>
<input_buffer exist="off"/>
<output_buffer exist="off"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<wire_param model_type="pie" res_val="0" cap_val="0" level="1"/> <!-- model_type could be T, res_val cap_val should be defined -->
</circuit_model>
<circuit_model type="mux" name="mux_tree_like" prefix="mux_tree_like" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree-like" add_const_input="true" const_input_val="1"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<!--mux2to1 subckt_name="mux2to1"/-->
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<circuit_model type="mux" name="mux_tree_like_tapbuf" prefix="mux_tree_like_tapbuf" dump_structural_verilog="true" is_default="0">
<design_technology type="cmos" structure="tree-like" add_const_input="true" const_input_val="1"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="tap_inv4"/>
<!--mux2to1 subckt_name="mux2to1"/-->
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<circuit_model type="mux" name="mux_1level_tapbuf" prefix="mux_1level_tapbuf" is_default="1" dump_structural_verilog="true">
<design_technology type="cmos" structure="tree-like" add_const_input="true" const_input_val="1"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="tap_inv4"/>
<!--mux2to1 subckt_name="mux2to1"/-->
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="1"/>
<port type="sram" prefix="sram" size="1"/>
</circuit_model>
<!--DFF subckt ports should be defined as <D> <Q> <CLK> <RESET> <SET> -->
<circuit_model type="ff" name="static_dff" prefix="dff" spice_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/SpiceNetlists/ff.sp" verilog_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/VerilogNetlists/ff.v">
<design_technology type="cmos"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="D" size="1"/>
<port type="input" prefix="Set" size="1" is_global="true" default_val="0" is_set="true"/>
<port type="input" prefix="Reset" size="1" is_global="true" default_val="1" is_reset="true"/>
<port type="output" prefix="Q" size="1"/>
<port type="clock" prefix="clk" size="1" is_global="true" default_val="0" />
</circuit_model>
<circuit_model type="lut" name="frac_lut6" prefix="frac_lut6" dump_structural_verilog="true">
<design_technology type="cmos" fracturable_lut="true"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<lut_input_buffer exist="on" circuit_model_name="buf4"/>
<lut_input_inverter exist="on" circuit_model_name="inv1"/>
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="in" size="6" tri_state_map="----11" circuit_model_name="or2"/>
<port type="output" prefix="lut4_out" size="4" lut_frac_level="4" lut_output_mask="0,1,2,3"/>
<port type="output" prefix="lut5_out" size="2" lut_frac_level="5" lut_output_mask="0,1"/>
<port type="output" prefix="lut6_out" size="1" lut_output_mask="0"/>
<port type="sram" prefix="sram" size="64"/>
<port type="sram" prefix="mode" size="2" mode_select="true" circuit_model_name="sc_dff_compact" default_val="1"/>
</circuit_model>
<!--Scan-chain DFF subckt ports should be defined as <D> <Q> <Qb> <CLK> <RESET> <SET> -->
<circuit_model type="sff" name="sc_dff_compact" prefix="scff" spice_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/SpiceNetlists/ff.sp" verilog_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/VerilogNetlists/ff.v">
<design_technology type="cmos"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="pReset" size="1" is_global="true" default_val="1" is_reset="true" is_prog="true"/>
<port type="input" prefix="pSet" size="1" is_global="true" default_val="0" is_set="true" is_prog="true"/>
<port type="input" prefix="D" size="1"/>
<port type="output" prefix="Q" size="1"/>
<port type="output" prefix="Qb" size="1"/>
<port type="clock" prefix="prog_clk" size="1" is_global="true" default_val="0" is_prog="true"/>
</circuit_model>
<circuit_model type="iopad" name="iopad" prefix="iopad" spice_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/SpiceNetlists/io.sp" verilog_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/VerilogNetlists/io.v">
<design_technology type="cmos"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<pass_gate_logic circuit_model_name="tgate"/>
<port type="inout" prefix="pad" size="1"/>
<port type="sram" prefix="en" size="1" mode_select="true" circuit_model_name="sc_dff_compact" default_val="1"/>
<!--port type="sram" prefix="enb" size="1" mode_select="true" circuit_model_name="sc_dff_compact" default_val="0"/-->
<port type="input" prefix="outpad" size="1"/>
<!-- <port type="input" prefix="zin" size="1" is_global="true" default_val="0" /> -->
<port type="output" prefix="inpad" size="1"/>
</circuit_model>
<!-- Hard logic definition for heterogenous blocks -->
<circuit_model type="hard_logic" name="adder_1bit" prefix="adder" spice_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/SpiceNetlists/adder.sp" verilog_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/VerilogNetlists/adder.v">
<design_technology type="cmos"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<port type="input" prefix="a" size="1"/>
<port type="input" prefix="b" size="1"/>
<port type="input" prefix="cin" size="1"/>
<port type="output" prefix="sumout" size="1"/>
<port type="output" prefix="cout" size="1"/>
</circuit_model>
<circuit_model type="sram" name="sram6T" prefix="sram" spice_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/SpiceNetlists/sram.sp" verilog_netlist="/research/ece/lnis/USERS/alacchi/Current_release/OpenFPGA/vpr7_x2p/vpr/VerilogNetlists/sram.v" >
<design_technology type="cmos"/>
<input_buffer exist="on" circuit_model_name="inv1"/>
<output_buffer exist="on" circuit_model_name="inv1"/>
<pass_gate_logic circuit_model_name="tgate"/>
<port type="input" prefix="in" size="1"/>
<port type="output" prefix="out" size="2"/>
</circuit_model>
</module_circuit_models>
</spice_settings>
<device>
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
models. We are modifying the delay values however, to include metal C and R, which allows more architecture
experimentation. We are also modifying the relative resistance of PMOS to be 1.8x that of NMOS
(vs. Ian's 3x) as 1.8x lines up with Jeff G's data from a 45 nm process (and is more typical of
45 nm in general). I'm upping the Rmin_nmos from Ian's just over 6k to nearly 9k, and dropping
RminW_pmos from 18k to 16k to hit this 1.8x ratio, while keeping the delays of buffers approximately
lined up with Stratix IV.
We are using Jeff G.'s capacitance data for 45 nm (in tech/ptm_45nm).
Jeff's tables list C in for transistors with widths in multiples of the minimum feature size (45 nm).
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply drive strength sizes in this file
by 2.5x when looking up in Jeff's tables.
The delay values are lined up with Stratix IV, which has an architecture similar to this
proposed FPGA, and which is also 40 nm
C_ipin_cblock: input capacitance of a track buffer, which VPR assumes is a single-stage
4x minimum drive strength buffer. -->
<sizing R_minW_nmos="8926" R_minW_pmos="16067" ipin_mux_trans_size="9"/>
<timing C_ipin_cblock="596e-18" T_ipin_cblock="77.93e-12"/>
<!-- The grid_logic_tile_area below will be used for all blocks that do not explicitly set their own (non-routing)
area; set to 0 since we explicitly set the area of all blocks currently in this architecture file.
-->
<area grid_logic_tile_area="0"/>
<sram area="6">
<verilog organization="scan-chain" circuit_model_name="sc_dff_compact"/>
<!--verilog organization="memory-bank" circuit_model_name="sram6T_blwl"/-->
<spice organization="standalone" circuit_model_name="sram6T" />
</sram>
<chan_width_distr>
<io width="1.000000"/>
<x distr="uniform" peak="1.000000"/>
<y distr="uniform" peak="1.000000"/>
</chan_width_distr>
<switch_block type="wilton" fs="3"/>
</device>
<cblocks>
<switch type="mux" name="cb_mux" R="0" Cin="596e-18" Cout="0" Tdel="77.93e-12" mux_trans_size="3" buf_size="63" circuit_model_name="mux_tree_like_tapbuf" structure="tree-like" num_level="2">
</switch>
</cblocks>
<switchlist>
<!-- VB: the mux_trans_size and buf_size data below is in minimum width transistor *areas*, assuming the purple
book area formula. This means the mux transistors are about 5x minimum drive strength.
We assume the first stage of the buffer is 3x min drive strength to be reasonable given the large
mux transistors, and this gives a reasonable stage ratio of a bit over 5x to the second stage. We assume
the n and p transistors in the first stage are equal-sized to lower the buffer trip point, since it's fed
by a pass transistor mux. We can then reverse engineer the buffer second stage to hit the specified
buf_size (really buffer area) - 16.2x minimum drive nmos and 1.8*16.2 = 29.2x minimum drive.
I then took the data from Jeff G.'s PTM modeling of 45 nm to get the Cin (gate of first stage) and Cout
(diff of second stage) listed below. Jeff's models are in tech/ptm_45nm, and are in min feature multiples.
The minimum contactable transistor is 2.5 * 45 nm, so I need to multiply the drive strength sizes above by
2.5x when looking up in Jeff's tables.
Finally, we choose a switch delay (58 ps) that leads to length 4 wires having a delay equal to that of SIV of 126 ps.
This also leads to the switch being 46% of the total wire delay, which is reasonable. -->
<switch type="mux" name="sb_mux_L4" R="105" Cin="596e-18" Cout="0e-15" Tdel="47.2e-12" mux_trans_size="3" buf_size="63" circuit_model_name="mux_tree_like_tapbuf" structure="tree-like" num_level="1">
</switch>
<switch type="mux" name="sb_mux_L2" R="115" Cin="596e-18" Cout="0e-15" Tdel="47.2e-12" mux_trans_size="3" buf_size="63" circuit_model_name="mux_tree_like_tapbuf" structure="tree-like" num_level="1">
</switch>
<switch type="mux" name="sb_mux_L1" R="128" Cin="596e-18" Cout="0e-15" Tdel="47.2e-12" mux_trans_size="3" buf_size="63" circuit_model_name="mux_tree_like_tapbuf" structure="tree-like" num_level="1">
</switch>
</switchlist>
<segmentlist>
<!--- VB & JL: using ITRS metal stack data, 96 nm half pitch wires, which are intermediate metal width/space.
With the 96 nm half pitch, such wires would take 60 um of height, vs. a 90 nm high (approximated as square) Stratix IV tile so this seems
reasonable. Using a tile length of 90 nm, corresponding to the length of a Stratix IV tile if it were square. -->
<segment freq="0.4" length="4" type="unidir" Rmetal="101" Cmetal="22.5e-15" circuit_model_name="chan_segment">
<mux name="sb_mux_L4"/>
<sb type="pattern">1 1 1 1 1</sb>
<cb type="pattern">1 1 1 1</cb>
</segment>
<segment freq="0.3" length="2" type="unidir" Rmetal="101" Cmetal="22.5e-15" circuit_model_name="chan_segment">
<mux name="sb_mux_L4"/>
<sb type="pattern">1 1 1</sb>
<cb type="pattern">1 1 </cb>
</segment>
<segment freq="0.3" length="1" type="unidir" Rmetal="101" Cmetal="22.5e-15" circuit_model_name="chan_segment">
<mux name="sb_mux_L4"/>
<sb type="pattern">1 1</sb>
<cb type="pattern">1</cb>
</segment>
</segmentlist>
<!--switch_segment_patterns>
<pattern type="unbuf_sb" seg_length="1" seg_type="unidir" pattern_length="2">
<unbuf_mux name="1"/>
<sb type ="pattern">0 1</sb>
</pattern>
</switch_segment_patterns-->
<complexblocklist>
<!-- Define I/O pads begin -->
<!-- Capacity is a unique property of I/Os, it is the maximum number of I/Os that can be placed at the same (X,Y) location on the FPGA -->
<!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
<pb_type name="io" capacity="7" area="0" idle_mode_name="inpad" physical_mode_name="io_phy">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
<!-- physical design description -->
<mode name="io_phy" disabled_in_packing="true">
<pb_type name="iopad" blif_model=".subckt io" num_pb="1" circuit_model_name="iopad" mode_bits="1">
<input name="outpad" num_pins="1"/>
<output name="inpad" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="inpad" input="iopad.inpad" output="io.inpad">
<delay_constant max="0e-11" in_port="iopad.inpad" out_port="io.inpad"/>
</direct>
<direct name="outpad" input="io.outpad" output="iopad.outpad">
<delay_constant max="0e-11" in_port="io.outpad" out_port="iopad.outpad"/>
</direct>
</interconnect>
</mode>
<!-- IOs can operate as either inputs or outputs.§
Delays below come from Ian Kuon. They are small, so they should be interpreted as
the delays to and from registers in the I/O (and generally I/Os are registered
today and that is when you timing analyze them.
-->
<mode name="inpad">
<pb_type name="inpad" blif_model=".input" num_pb="1" physical_pb_type_name="iopad" mode_bits="1">
<output name="inpad" num_pins="1" physical_mode_pin="inpad"/>
</pb_type>
<interconnect>
<direct name="inpad" input="inpad.inpad" output="io.inpad">
<delay_constant max="0e-11" in_port="inpad.inpad" out_port="io.inpad"/>
</direct>
</interconnect>
</mode>
<mode name="outpad">
<pb_type name="outpad" blif_model=".output" num_pb="1" physical_pb_type_name="iopad" mode_bits="0">
<input name="outpad" num_pins="1" physical_mode_pin="outpad"/>
</pb_type>
<interconnect>
<direct name="outpad" input="io.outpad" output="outpad.outpad">
<delay_constant max="0e-11" in_port="io.outpad" out_port="outpad.outpad"/>
</direct>
</interconnect>
</mode>
<!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
<fc default_in_type="frac" default_in_val="0.15" default_out_type="frac" default_out_val="0.10"/>
<!-- IOs go on the periphery of the FPGA, for consistency,
make it physically equivalent on all sides so that only one definition of I/Os is needed.
If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
-->
<pinlocations pattern="custom">
<loc side="left">io.outpad io.inpad</loc>
<loc side="top">io.outpad io.inpad</loc>
<loc side="right">io.outpad io.inpad</loc>
<loc side="bottom">io.outpad io.inpad</loc>
</pinlocations>
<!-- Place I/Os on the sides of the FPGA -->
<gridlocations>
<loc type="perimeter" priority="10"/>
</gridlocations>
<power method="ignore"/>
</pb_type>
<!-- Define I/O pads ends -->
<!-- Define general purpose logic block (CLB) begin -->
<!--- Area calculation: Total Stratix IV tile area is about 8100 um^2, and a minimum width transistor
area is 60 L^2 yields a tile area of 84375 MWTAs.
Routing at W=300 is 30481 MWTAs, leaving us with a total of 53000 MWTAs for logic block area
This means that only 37% of our area is in the general routing, and 63% is inside the logic
block. Note that the crossbar / local interconnect is considered part of the logic block
area in this analysis. That is a lower proportion of of routing area than most academics
assume, but note that the total routing area really includes the crossbar, which would push
routing area up significantly, we estimate into the ~70% range.
-->
<pb_type name="clb" area="53894">
<input name="I" num_pins="40" equivalent="true"/>
<input name="cin" num_pins="1"/>
<output name="O" num_pins="20" equivalent="false"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Describe fracturable logic element.
Each fracturable logic element has a 6-LUT that can alternatively operate as two 5-LUTs with shared inputs.
The outputs of the fracturable logic element can be optionally registered
-->
<pb_type name="fle" num_pb="10" physical_mode_name="fle_phy" idle_mode_name="n2_lut5">
<input name="in" num_pins="6"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<mode name="fle_phy" disabled_in_packing="true">
<pb_type name="frac_logic" num_pb="1">
<input name="in" num_pins="6"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<pb_type name="frac_lut6" blif_model=".frac_lut6" mode_bits="11" num_pb="1" circuit_model_name="frac_lut6">
<input name="in" num_pins="6"/>
<output name="lut4_out" num_pins="4"/>
<output name="lut5_out" num_pins="2"/>
<output name="lut6_out" num_pins="1"/>
</pb_type>
<pb_type name="adder_phy" blif_model=".subckt adder" num_pb="2" circuit_model_name="adder_1bit">
<input name="a" num_pins="1"/>
<input name="b" num_pins="1"/>
<input name="cin" num_pins="1"/>
<output name="cout" num_pins="1"/>
<output name="sumout" num_pins="1"/>
</pb_type>
<interconnect>
<direct name="direct_fraclut_in" input="frac_logic.in[5:0]" output="frac_lut6.in[5:0]"/>
<direct name="direct_cin" input="frac_logic.cin" output="adder_phy[0].cin"/>
<direct name="direct_carry" input="adder_phy[0].cout" output="adder_phy[1].cin"/>
<direct name="direct_cout" input="adder_phy[1].cout" output="frac_logic.cout"/>
<direct name="direct_lut4carry0" input="frac_lut6.lut4_out[0]" output="adder_phy[0].a"/>
<direct name="direct_lut4carry1" input="frac_lut6.lut4_out[1]" output="adder_phy[0].b"/>
<direct name="direct_lut4carry2" input="frac_lut6.lut4_out[2]" output="adder_phy[1].a"/>
<direct name="direct_lut4carry3" input="frac_lut6.lut4_out[3]" output="adder_phy[1].b"/>
<mux name="mux1" input="adder_phy[0].sumout frac_lut6.lut5_out[0]" output="frac_logic.out[0]">
<mode_select mode_name="n2_lut5.arithmetic" in_port="adder_phy[0].sumout" out_port="frac_logic.out[0]"/>
<mode_select mode_name="n2_lut5.blut5" in_port="frac_lut6.lut5_out[0]" out_port="frac_logic.out[0]"/>
<mode_select mode_name="n1_lut6" in_port="frac_lut6.lut5_out[0]" out_port="frac_logic.out[0]"/>
</mux>
<mux name="mux2" input="adder_phy[1].sumout frac_lut6.lut5_out[1] frac_lut6.lut6_out[0]" output="frac_logic.out[1]">
<mode_select mode_name="n2_lut5.arithmetic" in_port="adder_phy[1].sumout" out_port="frac_logic.out[1]"/>
<mode_select mode_name="n2_lut5.blut5" in_port="frac_lut6.lut5_out[1]" out_port="frac_logic.out[1]"/>
<mode_select mode_name="n1_lut6" in_port="frac_lut6.lut6_out[0]" out_port="frac_logic.out[1]"/>
</mux>
</interconnect>
</pb_type>
<pb_type name="ff_phy" blif_model=".latch" num_pb="2" class="flipflop" circuit_model_name="static_dff">
<input name="D" num_pins="1" port_class="D"/>
<output name="Q" num_pins="1" port_class="Q"/>
<clock name="clk" num_pins="1" port_class="clock"/>
<T_setup value="66e-12" port="ff_phy.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff_phy.Q" clock="clk"/>
</pb_type>
<interconnect>
<complete name="direct_clk" input="fle.clk" output="ff_phy[1:0].clk"/>
<direct name="direct_in" input="fle.in[5:0]" output="frac_logic.in[5:0]"/>
<direct name="direct_cin" input="fle.cin" output="frac_logic.cin"/>
<direct name="direct_cout" input="frac_logic.cout" output="fle.cout"/>
<direct name="direct_frac_out1" input="frac_logic.out[0]" output="ff_phy[0].D"/>
<direct name="direct_frac_out2" input="frac_logic.out[1]" output="ff_phy[1].D"/>
<mux name="mux1" input="ff_phy[0].Q frac_logic.out[0]" output="fle.out[0]">
</mux>
<mux name="mux2" input="ff_phy[1].Q frac_logic.out[1]" output="fle.out[1]">
</mux>
</interconnect>
</mode>
<mode name="n2_lut5" disabled_in_packing="false">
<!-- multi-mode support -->
<pb_type name="lut5inter" num_pb="1">
<input name="in" num_pins="5"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="2"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="ble5" num_pb="2" idle_mode_name="blut5">
<input name="in" num_pins="5"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<mode name="blut5">
<pb_type name="flut5" num_pb="1">
<input name="in" num_pins="5"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Regular LUT mode -->
<pb_type name="lut5" blif_model=".names" num_pb="1" class="lut" mode_bits="01" physical_pb_type_name="frac_lut6" physical_pb_type_index_factor="0.5">
<input name="in" num_pins="5" port_class="lut_in" physical_mode_pin="in[5:0]"/>
<output name="out" num_pins="1" port_class="lut_out" physical_mode_pin="lut5_out" physical_mode_pin_rotate_offset="1"/>
<!-- LUT timing using delay matrix -->
<!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
we instead take the average of these numbers to get more stable results
82e-12
173e-12
261e-12
263e-12
398e-12
-->
<delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
235e-12
235e-12
235e-12
235e-12
235e-12
</delay_matrix>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop" physical_pb_type_name="ff_phy">
<input name="D" num_pins="1" port_class="D" physical_mode_pin="D"/>
<output name="Q" num_pins="1" port_class="Q" physical_mode_pin="Q"/>
<clock name="clk" num_pins="1" port_class="clock" physical_mode_pin="clk"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="flut5.in" output="lut5.in"/>
<direct name="direct2" input="lut5.out" output="ff.D">
<pack_pattern name="ble5" in_port="lut5.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="flut5.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut5.out" output="flut5.out" spice_model_sram_offset="0">
<delay_constant max="25e-12" in_port="lut5.out" out_port="flut5.out" />
<delay_constant max="45e-12" in_port="ff.Q" out_port="flut5.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="ble5.in" output="flut5.in"/>
<direct name="direct2" input="ble5.clk" output="flut5.clk"/>
<direct name="direct3" input="flut5.out" output="ble5.out"/>
</interconnect>
</mode>
<mode name="arithmetic">
<pb_type name="arithmetic" num_pb="1">
<input name="in" num_pins="4"/>
<input name="cin" num_pins="1"/>
<output name="out" num_pins="1"/>
<output name="cout" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<!-- Special dual-LUT mode that drives adder only -->
<pb_type name="lut4" blif_model=".names" num_pb="2" class="lut" mode_bits="11" physical_pb_type_name="frac_lut6" physical_pb_type_index_factor="0.25">
<input name="in" num_pins="4" port_class="lut_in" physical_mode_pin="in[4:0]"/>
<output name="out" num_pins="1" port_class="lut_out" physical_mode_pin="lut4_out" physical_mode_pin_rotate_offset="1"/>
<!-- LUT timing using delay matrix -->
<!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
we instead take the average of these numbers to get more stable results
82e-12
173e-12
261e-12
263e-12
-->
<delay_matrix type="max" in_port="lut4.in" out_port="lut4.out">
195e-12
195e-12
195e-12
195e-12
</delay_matrix>
</pb_type>
<pb_type name="adder" blif_model=".subckt adder" num_pb="1" physical_pb_type_name="adder_phy">
<input name="a" num_pins="1" physical_mode_pin="a"/>
<input name="b" num_pins="1" physical_mode_pin="b"/>
<input name="cin" num_pins="1" physical_mode_pin="cin"/>
<output name="cout" num_pins="1" physical_mode_pin="cout"/>
<output name="sumout" num_pins="1" physical_mode_pin="sumout"/>
<delay_constant max="0.3e-9" in_port="adder.a" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.b" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.cin" out_port="adder.sumout"/>
<delay_constant max="0.3e-9" in_port="adder.a" out_port="adder.cout"/>
<delay_constant max="0.3e-9" in_port="adder.b" out_port="adder.cout"/>
<delay_constant max="0.01e-9" in_port="adder.cin" out_port="adder.cout"/>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop" physical_pb_type_name="ff_phy">
<input name="D" num_pins="1" port_class="D" physical_mode_pin="D"/>
<output name="Q" num_pins="1" port_class="Q" physical_mode_pin="Q"/>
<clock name="clk" num_pins="1" port_class="clock" physical_mode_pin="clk"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="clock" input="arithmetic.clk" output="ff.clk"/>
<direct name="lut_in1" input="arithmetic.in[3:0]" output="lut4[0:0].in[3:0]"/>
<direct name="lut_in2" input="arithmetic.in[3:0]" output="lut4[1:1].in[3:0]"/>
<direct name="lut_to_add1" input="lut4[0:0].out" output="adder.a">
</direct>
<direct name="lut_to_add2" input="lut4[1:1].out" output="adder.b">
</direct>
<direct name="add_to_ff" input="adder.sumout" output="ff.D">
<pack_pattern name="chain" in_port="adder.sumout" out_port="ff.D"/>
</direct>
<direct name="carry_in" input="arithmetic.cin" output="adder.cin">
<pack_pattern name="chain" in_port="arithmetic.cin" out_port="adder.cin"/>
</direct>
<direct name="carry_out" input="adder.cout" output="arithmetic.cout">
<pack_pattern name="chain" in_port="adder.cout" out_port="arithmetic.cout"/>
</direct>
<mux name="sumout" input="ff.Q adder.sumout" output="arithmetic.out">
<delay_constant max="25e-12" in_port="adder.sumout" out_port="arithmetic.out"/>
<delay_constant max="45e-12" in_port="ff.Q" out_port="arithmetic.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="ble5.in[3:0]" output="arithmetic.in"/>
<direct name="carry_in" input="ble5.cin" output="arithmetic.cin">
<pack_pattern name="chain" in_port="ble5.cin" out_port="arithmetic.cin"/>
</direct>
<direct name="carry_out" input="arithmetic.cout" output="ble5.cout">
<pack_pattern name="chain" in_port="arithmetic.cout" out_port="ble5.cout"/>
</direct>
<direct name="direct2" input="ble5.clk" output="arithmetic.clk"/>
<direct name="direct3" input="arithmetic.out" output="ble5.out"/>
</interconnect>
</mode>
</pb_type>
<interconnect>
<direct name="direct1" input="lut5inter.in" output="ble5[0:0].in"/>
<direct name="direct2" input="lut5inter.in" output="ble5[1:1].in"/>
<direct name="direct3" input="ble5[1:0].out" output="lut5inter.out"/>
<direct name="carry_in" input="lut5inter.cin" output="ble5[0:0].cin">
<pack_pattern name="chain" in_port="lut5inter.cin" out_port="ble5[0:0].cin"/>
</direct>
<direct name="carry_out" input="ble5[1:1].cout" output="lut5inter.cout">
<pack_pattern name="chain" in_port="ble5[1:1].cout" out_port="lut5inter.cout"/>
</direct>
<direct name="carry_link" input="ble5[0:0].cout" output="ble5[1:1].cin">
<pack_pattern name="chain" in_port="ble5[0:0].cout" out_port="ble5[1:1].cout"/>
</direct>
<complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in[4:0]" output="lut5inter.in"/>
<direct name="direct2" input="lut5inter.out" output="fle.out"/>
<direct name="direct3" input="fle.clk" output="lut5inter.clk"/>
<direct name="carry_in" input="fle.cin" output="lut5inter.cin">
<pack_pattern name="chain" in_port="fle.cin" out_port="lut5inter.cin"/>
</direct>
<direct name="carry_out" input="lut5inter.cout" output="fle.cout">
<pack_pattern name="chain" in_port="lut5inter.cout" out_port="fle.cout"/>
</direct>
</interconnect>
</mode> <!-- n2_lut5 -->
<mode name="n1_lut6">
<pb_type name="ble6" num_pb="1">
<input name="in" num_pins="6"/>
<output name="out" num_pins="1"/>
<clock name="clk" num_pins="1"/>
<pb_type name="lut6" blif_model=".names" num_pb="1" class="lut" mode_bits="00" physical_pb_type_name="frac_lut6" spice_model_sram_offset="0">
<input name="in" num_pins="6" port_class="lut_in" physical_mode_pin="in[5:0]"/>
<output name="out" num_pins="1" port_class="lut_out" physical_mode_pin="lut6_out[0]"/>
<!-- LUT timing using delay matrix -->
<!-- These are the physical delay inputs on a Stratix IV LUT but because VPR cannot do LUT rebalancing,
we instead take the average of these numbers to get more stable results
82e-12
173e-12
261e-12
263e-12
398e-12
397e-12
-->
<delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
261e-12
261e-12
261e-12
261e-12
261e-12
261e-12
</delay_matrix>
</pb_type>
<pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop" physical_pb_type_name="ff_phy" physical_pb_type_index_factor="2" physical_pb_type_index_offset="1">
<input name="D" num_pins="1" port_class="D" physical_mode_pin="D"/>
<output name="Q" num_pins="1" port_class="Q" physical_mode_pin="Q"/>
<clock name="clk" num_pins="1" port_class="clock" physical_mode_pin="clk"/>
<T_setup value="66e-12" port="ff.D" clock="clk"/>
<T_clock_to_Q max="124e-12" port="ff.Q" clock="clk"/>
</pb_type>
<interconnect>
<direct name="direct1" input="ble6.in" output="lut6[0:0].in"/>
<direct name="direct2" input="lut6.out" output="ff.D">
<pack_pattern name="ble6" in_port="lut6.out" out_port="ff.D"/>
</direct>
<direct name="direct3" input="ble6.clk" output="ff.clk"/>
<mux name="mux1" input="ff.Q lut6.out" output="ble6.out">
<delay_constant max="25e-12" in_port="lut6.out" out_port="ble6.out" />
<delay_constant max="45e-12" in_port="ff.Q" out_port="ble6.out" />
</mux>
</interconnect>
</pb_type>
<interconnect>
<direct name="direct1" input="fle.in" output="ble6.in"/>
<direct name="direct2" input="ble6.out" output="fle.out[1:1]"/>
<direct name="direct3" input="fle.clk" output="ble6.clk"/>
</interconnect>
</mode> <!-- n1_lut6 -->
</pb_type>
<interconnect>
<!-- We use a full crossbar to get logical equivalence at inputs of CLB
The delays below come from Stratix IV. the delay through a connection block
input mux + the crossbar in Stratix IV is 167 ps. We already have a 72 ps
delay on the connection block input mux (modeled by Ian Kuon), so the remaining
delay within the crossbar is 95 ps.
The delays of cluster feedbacks in Stratix IV is 100 ps, when driven by a LUT.
Since all our outputs LUT outputs go to a BLE output, and have a delay of
25 ps to do so, we subtract 25 ps from the 100 ps delay of a feedback
to get the part that should be marked on the crossbar. -->
<complete name="crossbar" input="clb.I fle[9:0].out" output="fle[9:0].in" circuit_model_name="mux_tree_like">
<delay_constant max="95e-12" in_port="clb.I" out_port="fle[9:0].in" />
<delay_constant max="75e-12" in_port="fle[9:0].out" out_port="fle[9:0].in" />
</complete>
<complete name="clks" input="clb.clk" output="fle[9:0].clk">
</complete>
<!-- This way of specifying direct connection to clb outputs is important because this architecture uses automatic spreading of opins.
By grouping to output pins in this fashion, if a logic block is completely filled by 6-LUTs,
then the outputs those 6-LUTs take get evenly distributed across all four sides of the CLB instead of clumped on two sides (which is what happens with a more
naive specification).
-->
<direct name="clbouts1" input="fle[9:0].out[0:0]" output="clb.O[9:0]"/>
<direct name="clbouts2" input="fle[9:0].out[1:1]" output="clb.O[19:10]"/>
<!-- Carry chain links -->
<direct name="carry_in" input="clb.cin" output="fle[0:0].cin">
<!-- Put all inter-block carry chain delay on this one edge -->
<delay_constant max="0.16e-9" in_port="clb.cin" out_port="fle[0:0].cin"/>
<pack_pattern name="chain" in_port="clb.cin" out_port="fle[0:0].cin"/>
</direct>
<direct name="carry_out" input="fle[9:9].cout" output="clb.cout">
<pack_pattern name="chain" in_port="fle[9:9].cout" out_port="clb.cout"/>
</direct>
<direct name="carry_link" input="fle[8:0].cout" output="fle[9:1].cin">
<pack_pattern name="chain" in_port="fle[8:0].cout" out_port="fle[9:1].cin"/>
</direct>
</interconnect>
<fc default_in_type="frac" default_in_val="0.15" default_out_type="frac" default_out_val="0.10">
<pin name="cin" fc_type="frac" fc_val="0"/>
<pin name="cout" fc_type="frac" fc_val="0"/>
</fc>
<pinlocations pattern="spread"/>
<gridlocations>
<loc type="fill" priority="1"/>
</gridlocations>
</pb_type>
<!-- Define general purpose logic block (CLB) ends -->
</complexblocklist>
<power>
<local_interconnect C_wire="2.5e-10"/>
<mux_transistor_size mux_transistor_size="3"/>
<FF_size FF_size="4"/>
<LUT_transistor_size LUT_transistor_size="4"/>
</power>
<clocks>
<clock buffer_size="auto" C_wire="2.5e-10"/>
</clocks>
</architecture>

File diff suppressed because it is too large Load Diff

View File

@ -57,6 +57,8 @@ struct s_TokenPair OptionBaseTokenList[] = {
{ "power_output_file", OT_POWER_OUT_FILE }, /* Output file for power results */
{ "power", OT_POWER }, /* Run power estimation? */
{ "tech_properties", OT_CMOS_TECH_BEHAVIOR_FILE }, /* Technology properties */
/* Xifan Tang: Tileable routing support !!! */
{ "use_tileable_route_chan_width", OT_USE_TILEABLE_ROUTE_CHAN_WIDTH}, /* Enable adaption to tileable route chan_width */
/* General FPGA_X2P: FPGA-SPICE/Verilog/Bitstream Options */
{ "fpga_x2p_rename_illegal_port", OT_FPGA_X2P_RENAME_ILLEGAL_PORT }, /* Xifan TANG: rename illegal port names */
{ "fpga_x2p_signal_density_weight", OT_FPGA_X2P_SIGNAL_DENSITY_WEIGHT }, /* The weight of signal density */

View File

@ -74,6 +74,8 @@ enum e_OptionBaseToken {
OT_ACTIVITY_FILE,
OT_POWER_OUT_FILE,
OT_CMOS_TECH_BEHAVIOR_FILE,
/* Xifan Tang: Tileable routing support !!! */
OT_USE_TILEABLE_ROUTE_CHAN_WIDTH,
/* General FPGA_X2P: FPGA-SPICE/Verilog/Bitstream Options */
OT_FPGA_X2P_RENAME_ILLEGAL_PORT,
OT_FPGA_X2P_SIGNAL_DENSITY_WEIGHT, /* The weight of signal density in determining number of clock cycles in simulation */

View File

@ -474,6 +474,10 @@ ProcessOption(INP char **Args, INOUTP t_options * Options) {
case OT_CMOS_TECH_BEHAVIOR_FILE:
return ReadString(Args, &Options->CmosTechFile);
/* Xifan Tang: Tileable routing support !!! */
case OT_USE_TILEABLE_ROUTE_CHAN_WIDTH:
return Args;
/* Xifan Tang: FPGA X2P Options*/
case OT_FPGA_X2P_RENAME_ILLEGAL_PORT:
return Args;

View File

@ -536,9 +536,12 @@ static void SetupRoutingArch(INP t_arch Arch,
OUTP struct s_det_routing_arch *RoutingArch) {
RoutingArch->switch_block_type = Arch.SBType;
RoutingArch->switch_block_sub_type = Arch.SBSubType;
RoutingArch->R_minW_nmos = Arch.R_minW_nmos;
RoutingArch->R_minW_pmos = Arch.R_minW_pmos;
RoutingArch->Fs = Arch.Fs;
RoutingArch->sub_Fs = Arch.SubFs;
RoutingArch->wire_opposite_side = Arch.wire_opposite_side;
RoutingArch->directionality = BI_DIRECTIONAL;
if (Arch.Segments)
RoutingArch->directionality = Arch.Segments[0].directionality;
@ -624,6 +627,13 @@ static void SetupRouterOpts(INP t_options Options, INP boolean TimingEnabled,
if (Options.Count[OT_SHOW_PASS_TRANS]) {
is_show_pass_trans = TRUE;
}
/* END */
/* Xifan Tang: Tileable routing support !!! */
RouterOpts->use_tileable_route_chan_width = FALSE;
if (Options.Count[OT_USE_TILEABLE_ROUTE_CHAN_WIDTH]) {
RouterOpts->use_tileable_route_chan_width = TRUE;
}
/* END */
/* Depends on RouterOpts->router_algorithm */

View File

@ -207,6 +207,7 @@ static void ShowRouterOpts(INP struct s_router_opts RouterOpts) {
} else {
vpr_printf(TIO_MESSAGE_INFO, "%d\n", RouterOpts.fixed_channel_width);
}
vpr_printf(TIO_MESSAGE_INFO, "RouterOpts.use_tileable_route_chan_width: %s\n", RouterOpts.use_tileable_route_chan_width ? "TRUE\n" : "FALSE\n");
vpr_printf(TIO_MESSAGE_INFO, "RouterOpts.acc_fac: %f\n", RouterOpts.acc_fac);
vpr_printf(TIO_MESSAGE_INFO, "RouterOpts.bb_factor: %d\n", RouterOpts.bb_factor);
@ -260,6 +261,7 @@ static void ShowRouterOpts(INP struct s_router_opts RouterOpts) {
} else {
vpr_printf(TIO_MESSAGE_INFO, "%d\n", RouterOpts.fixed_channel_width);
}
vpr_printf(TIO_MESSAGE_INFO, "RouterOpts.use_tileable_route_chan_width: ", RouterOpts.use_tileable_route_chan_width ? "TRUE\n" : "FALSE\n");
vpr_printf(TIO_MESSAGE_INFO, "RouterOpts.acc_fac: %f\n", RouterOpts.acc_fac);
vpr_printf(TIO_MESSAGE_INFO, "RouterOpts.bb_factor: %d\n", RouterOpts.bb_factor);

View File

@ -27,6 +27,8 @@
/* CLB PIN REMAP */
#include "place_clb_pin_remap.h"
#include "tileable_chan_details_builder.h"
/******************* Subroutines local to this module ************************/
static int binary_search_place_and_route(struct s_placer_opts placer_opts,
@ -151,6 +153,21 @@ void place_and_route(enum e_operation operation,
}
/* Other constraints can be left to rr_graph to check since this is one pass routing */
/* Xifan Tang: W estimation for tileable routing architecture */
/* Build the segment inf vector */
std::vector<t_segment_inf> segment_vec;
for (int iseg = 0; iseg < det_routing_arch.num_segment; ++iseg) {
segment_vec.push_back(segment_inf[iseg]);
}
if (TRUE == router_opts.use_tileable_route_chan_width) {
int adapted_W = adapt_to_tileable_route_chan_width(width_fac, segment_vec);
vpr_printf(TIO_MESSAGE_INFO,
"Adapt routing channel width (%d) to be tileable: %d\n",
width_fac, adapted_W);
width_fac = adapted_W;
}
/* Allocate the major routing structures. */
clb_opins_used_locally = alloc_route_structs();
@ -326,14 +343,22 @@ static int binary_search_place_and_route(struct s_placer_opts placer_opts,
udsd_multiplier = 2;
/* UDSD by AY End */
if (router_opts.fixed_channel_width != NO_FIXED_CHANNEL_WIDTH) {
current = router_opts.fixed_channel_width + 5 * udsd_multiplier;
low = router_opts.fixed_channel_width - 1 * udsd_multiplier;
} else {
current = max_pins_per_clb + max_pins_per_clb % 2; /* Binary search part */
/* End */
low = -1;
}
/* Build the segment inf vector */
std::vector<t_segment_inf> segment_vec;
for (int iseg = 0; iseg < det_routing_arch.num_segment; ++iseg) {
segment_vec.push_back(segment_inf[iseg]);
}
/* Constraints must be checked to not break rr_graph generator */
if (det_routing_arch.directionality == UNI_DIRECTIONAL) {
if (current % 2 != 0) {
@ -356,6 +381,21 @@ static int binary_search_place_and_route(struct s_placer_opts placer_opts,
attempt_count = 0;
while (final == -1) {
/* Xifan Tang: W estimation for tileable routing architecture */
if (TRUE == router_opts.use_tileable_route_chan_width) {
int adapted_W = adapt_to_tileable_route_chan_width(current, segment_vec);
vpr_printf(TIO_MESSAGE_INFO,
"Adapt routing channel width (%d) to be tileable: %d\n",
current, adapted_W);
current = adapted_W;
}
/* Do a early exit when the current equals to high or low,
* This means that the current W has been tried already. We just return a final value (high)
*/
if ( (current == high) || (current == low) ) {
final = high;
break;
}
vpr_printf(TIO_MESSAGE_INFO, "Using low: %d, high: %d, current: %d\n", low, high, current);
fflush(stdout);
@ -442,6 +482,7 @@ static int binary_search_place_and_route(struct s_placer_opts placer_opts,
if (low != -1) {
current = (high + low) / 2;
} else {
current = high / 2; /* haven't found lower bound yet */
}
@ -457,6 +498,7 @@ static int binary_search_place_and_route(struct s_placer_opts placer_opts,
final = high;
current = (high + low) / 2;
} else {
if (router_opts.fixed_channel_width != NO_FIXED_CHANNEL_WIDTH) {
/* FOR Wneed = f(Fs) search */
@ -468,6 +510,7 @@ static int binary_search_place_and_route(struct s_placer_opts placer_opts,
}
} else {
current = low * 2; /* Haven't found upper bound yet */
}
}
}
@ -560,8 +603,11 @@ static int binary_search_place_and_route(struct s_placer_opts placer_opts,
free_rr_graph();
build_rr_graph(graph_type, num_types, type_descriptors, nx, ny, grid,
chan_width_x[0], NULL, det_routing_arch.switch_block_type,
det_routing_arch.Fs, det_routing_arch.num_segment,
chan_width_x[0], NULL,
det_routing_arch.switch_block_type, det_routing_arch.Fs,
det_routing_arch.switch_block_sub_type, det_routing_arch.sub_Fs,
det_routing_arch.wire_opposite_side,
det_routing_arch.num_segment,
det_routing_arch.num_switch, segment_inf,
det_routing_arch.global_route_switch,
det_routing_arch.delayless_switch, timing_inf,

View File

@ -146,6 +146,9 @@ void vpr_print_usage(void) {
"\t[--acc_fac <float>] [--first_iter_pres_fac <float>]\n");
vpr_printf(TIO_MESSAGE_INFO,
"\t[--bend_cost <float>] [--route_type global | detailed]\n");
/* Xifan Tang: Tileable routing support !!! */
vpr_printf(TIO_MESSAGE_INFO,
"\t[--use_tileable_route_chan_width ]\n");
vpr_printf(TIO_MESSAGE_INFO,
"\t[--verify_binary_search] [--route_chan_width <int>]\n");
vpr_printf(TIO_MESSAGE_INFO,

View File

@ -759,6 +759,8 @@ struct s_router_opts {
boolean verify_binary_search;
boolean full_stats;
boolean doRouting;
/* Xifan Tang: option to enable adaption to tileable route channel width */
boolean use_tileable_route_chan_width;
};
/* All the parameters controlling the router's operation are in this *
@ -807,6 +809,9 @@ struct s_det_routing_arch {
enum e_directionality directionality; /* UDSD by AY */
int Fs;
enum e_switch_block_type switch_block_type;
int sub_Fs;
boolean wire_opposite_side;
enum e_switch_block_type switch_block_sub_type;
int num_segment;
short num_switch;
short global_route_switch;

View File

@ -1,2 +1,2 @@
rm tags
ctags -R shell_main.c main.c ./* ../../libarchfpga/include/*.[ch] ../../libarchfpga/fpga_spice_include/*.[ch] ../../libarchfpga/*.[ch] ../../pcre/SRC/*.[ch] ../../libarchfpga/SRC/include/*.[ch]
ctags -R shell_main.c main.c ./* ../../libarchfpga/SRC/include/*.[ch] ../../libarchfpga/SRC/fpga_spice_include/*.[ch] ../../libarchfpga/SRC/*.[ch] ../../pcre/SRC/*.[ch]

View File

@ -506,3 +506,182 @@ void print_rr_graph_stats(const t_rr_graph& rr_graph) {
return;
}
/************************************************************************
* Print statistics of a rr_graph
* 1. We print number of nodes by types
* 2. Print the number of edges
************************************************************************/
void print_rr_graph_stats() {
/* Print number of nodes */
vpr_printf(TIO_MESSAGE_INFO, "Statistics on number of RR nodes (by node type): \n");
/* Count the number of nodes */
std::vector<size_t> num_nodes_per_type;
num_nodes_per_type.resize(NUM_RR_TYPES);
num_nodes_per_type.assign(NUM_RR_TYPES, 0);
for (int inode = 0; inode < num_rr_nodes; ++inode) {
num_nodes_per_type[rr_node[inode].type]++;
}
/* Get the largest string size of rr_node_typename */
size_t max_str_typename = 0;
for (int type = 0; type < NUM_RR_TYPES; ++type) {
max_str_typename = std::max(max_str_typename, strlen(rr_node_typename[type]));
}
/* Constant strings */
char* type_str = " Type ";
char* total_str = " Total ";
char* node_str = " No. of nodes ";
char* edge_str = " No. of edges ";
/* Count the number of characters per line:
* we check the string length of each node type
* Then we plus two reserved strings "type" and "total"
*/
size_t num_char_per_line = 0;
for (int type = 0; type < NUM_RR_TYPES; ++type) {
num_char_per_line += 6 + max_str_typename;
}
num_char_per_line += strlen(type_str);
num_char_per_line += strlen(total_str);
/* Print splitter */
for (size_t ichar = 0; ichar < num_char_per_line; ++ichar) {
vpr_printf(TIO_MESSAGE_INFO, "-");
}
vpr_printf(TIO_MESSAGE_INFO, "\n");
/* Print node type */
vpr_printf(TIO_MESSAGE_INFO, "%s", type_str);
for (int type = 0; type < NUM_RR_TYPES; ++type) {
vpr_printf(TIO_MESSAGE_INFO, " %s ", rr_node_typename[type]);
}
vpr_printf(TIO_MESSAGE_INFO, "%s", total_str);
vpr_printf(TIO_MESSAGE_INFO, "\n");
/* Print node numbers */
int total_num_nodes = 0;
vpr_printf(TIO_MESSAGE_INFO, "%s", node_str);
for (int type = 0; type < NUM_RR_TYPES; ++type) {
vpr_printf(TIO_MESSAGE_INFO, " %10lu ", num_nodes_per_type[type]);
total_num_nodes += num_nodes_per_type[type];
}
vpr_printf(TIO_MESSAGE_INFO, " %10lu ", num_rr_nodes);
vpr_printf(TIO_MESSAGE_INFO, "\n");
/* Check we have the same number as stated in rr_graph */
assert (total_num_nodes == num_rr_nodes);
/* Count the number of edges */
size_t num_edges = 0;
std::vector<size_t> num_edges_per_type;
num_edges_per_type.resize(NUM_RR_TYPES);
num_edges_per_type.assign(NUM_RR_TYPES, 0);
for (int inode = 0; inode < num_rr_nodes; ++inode) {
num_edges_per_type[rr_node[inode].type] += rr_node[inode].num_edges;
}
for (int inode = 0; inode < num_rr_nodes; ++inode) {
num_edges += rr_node[inode].num_edges;
}
/* Print number of edges */
vpr_printf(TIO_MESSAGE_INFO, "%s", edge_str);
for (int type = 0; type < NUM_RR_TYPES; ++type) {
vpr_printf(TIO_MESSAGE_INFO, " %10lu ", num_edges_per_type[type]);
}
vpr_printf(TIO_MESSAGE_INFO, " %10lu ", num_edges);
vpr_printf(TIO_MESSAGE_INFO, "\n");
/* Print splitter */
for (size_t ichar = 0; ichar < num_char_per_line; ++ichar) {
vpr_printf(TIO_MESSAGE_INFO, "-");
}
vpr_printf(TIO_MESSAGE_INFO, "\n");
/* Print MUX size distribution */
/* Get the maximum SB mux size */
short max_sb_mux_size = 0;
for (int inode = 0; inode < num_rr_nodes; ++inode) {
/* MUX multiplexers for SBs */
if ( (CHANX == rr_node[inode].type)
|| (CHANY == rr_node[inode].type) ) {
max_sb_mux_size = std::max(rr_node[inode].fan_in, max_sb_mux_size);
}
}
/* Get the minimum SB mux size */
short min_sb_mux_size = max_sb_mux_size;
for (int inode = 0; inode < num_rr_nodes; ++inode) {
/* MUX multiplexers for SBs */
if ( (CHANX == rr_node[inode].type)
|| (CHANY == rr_node[inode].type) ) {
min_sb_mux_size = std::min(rr_node[inode].fan_in, min_sb_mux_size);
}
}
/* Get the minimum SB mux size */
int num_sb_mux = 0;
size_t avg_sb_mux_size = 0;
for (int inode = 0; inode < num_rr_nodes; ++inode) {
/* MUX multiplexers for SBs */
if ( (CHANX == rr_node[inode].type)
|| (CHANY == rr_node[inode].type) ) {
avg_sb_mux_size += rr_node[inode].fan_in;
num_sb_mux++;
}
}
avg_sb_mux_size /= num_sb_mux;
/* Print statistics */
vpr_printf(TIO_MESSAGE_INFO, "------------------------------------------------\n");
vpr_printf(TIO_MESSAGE_INFO, "Total No. of Switch Block Multiplexer size:%d\n", num_sb_mux);
vpr_printf(TIO_MESSAGE_INFO, "Maximum Switch Block Multiplexer size:%d\n", max_sb_mux_size);
vpr_printf(TIO_MESSAGE_INFO, "Minimum Switch Block Multiplexer size:%d\n", min_sb_mux_size);
vpr_printf(TIO_MESSAGE_INFO, "Average Switch Block Multiplexer size:%lu\n", avg_sb_mux_size);
vpr_printf(TIO_MESSAGE_INFO, "------------------------------------------------\n");
/* Get the maximum SB mux size */
short max_cb_mux_size = 0;
for (int inode = 0; inode < num_rr_nodes; ++inode) {
/* MUX multiplexers for SBs */
if (IPIN == rr_node[inode].type) {
max_cb_mux_size = std::max(rr_node[inode].fan_in, max_cb_mux_size);
}
}
/* Get the minimum SB mux size */
short min_cb_mux_size = max_cb_mux_size;
for (int inode = 0; inode < num_rr_nodes; ++inode) {
/* MUX multiplexers for SBs */
if (IPIN == rr_node[inode].type) {
min_cb_mux_size = std::min(rr_node[inode].fan_in, min_cb_mux_size);
}
}
/* Get the minimum SB mux size */
int num_cb_mux = 0;
size_t avg_cb_mux_size = 0;
for (int inode = 0; inode < num_rr_nodes; ++inode) {
/* MUX multiplexers for SBs */
if (IPIN == rr_node[inode].type) {
avg_cb_mux_size += rr_node[inode].fan_in;
num_cb_mux++;
}
}
avg_cb_mux_size /= num_cb_mux;
vpr_printf(TIO_MESSAGE_INFO, "------------------------------------------------\n");
vpr_printf(TIO_MESSAGE_INFO, "Total No. of Connection Block Multiplexer size:%d\n", num_cb_mux);
vpr_printf(TIO_MESSAGE_INFO, "Maximum Connection Block Multiplexer size:%d\n", max_cb_mux_size);
vpr_printf(TIO_MESSAGE_INFO, "Minimum Connection Block Multiplexer size:%d\n", min_cb_mux_size);
vpr_printf(TIO_MESSAGE_INFO, "Average Connection Block Multiplexer size:%lu\n", avg_cb_mux_size);
vpr_printf(TIO_MESSAGE_INFO, "------------------------------------------------\n");
return;
}
/************************************************************************
* End of file : rr_graph_builder_utils.cpp
***********************************************************************/

View File

@ -43,5 +43,7 @@ short get_track_rr_node_end_track_id(const t_rr_node* track_rr_node);
void print_rr_graph_stats(const t_rr_graph& rr_graph);
void print_rr_graph_stats();
#endif

View File

@ -118,6 +118,25 @@ std::vector<size_t> get_num_tracks_per_seg_type(const size_t chan_width,
return result;
}
/************************************************************************
* Adapt the number of channel width to a tileable routing architecture
***********************************************************************/
int adapt_to_tileable_route_chan_width(int chan_width,
std::vector<t_segment_inf> segment_infs) {
int tileable_chan_width = 0;
/* Estimate the number of segments per type by the given ChanW*/
std::vector<size_t> num_tracks_per_seg_type = get_num_tracks_per_seg_type(chan_width,
segment_infs,
true); /* Force to use the full segment group */
/* Sum-up the number of tracks */
for (size_t iseg = 0; iseg < num_tracks_per_seg_type.size(); ++iseg) {
tileable_chan_width += num_tracks_per_seg_type[iseg];
}
return tileable_chan_width;
}
/************************************************************************
* Build details of routing tracks in a channel
* The function will

View File

@ -4,6 +4,8 @@
#include "vpr_types.h"
#include "chan_node_details.h"
int adapt_to_tileable_route_chan_width(int chan_width, std::vector<t_segment_inf> segment_inf);
ChanNodeDetails build_unidir_chan_node_details(const size_t chan_width, const size_t max_seg_length,
const enum e_side device_side,
const std::vector<t_segment_inf> segment_inf);

View File

@ -31,6 +31,8 @@
* +-------------------------------------+
* | 2019/06/11 | Xifan Tang | Created
* +-------------------------------------+
* | 2019/07/02 | Xifan Tang | Modified to support SB subtype and SubFs
* +-------------------------------------+
***********************************************************************/
/************************************************************************
* This file contains a builder for the complex rr_graph data structure
@ -777,7 +779,9 @@ void build_rr_graph_edges(t_rr_graph* rr_graph,
const std::vector<size_t> device_chan_width,
const std::vector<t_segment_inf> segment_inf,
int** Fc_in, int** Fc_out,
const enum e_switch_block_type sb_type, const int Fs) {
const enum e_switch_block_type sb_type, const int Fs,
const enum e_switch_block_type sb_subtype, const int subFs,
const bool wire_opposite_side) {
/* Create edges for SOURCE and SINK nodes for a tileable rr_graph */
build_rr_graph_edges_for_source_nodes(rr_graph, grids);
@ -804,7 +808,9 @@ void build_rr_graph_edges(t_rr_graph* rr_graph,
/* adapt the switch_block_conn for the GSB nodes */
t_track2track_map sb_conn; /* [0..from_gsb_side][0..chan_width-1][track_indices] */
sb_conn = build_gsb_track_to_track_map(rr_graph, rr_gsb, sb_type, Fs, segment_inf);
sb_conn = build_gsb_track_to_track_map(rr_graph, rr_gsb,
sb_type, Fs, sb_subtype, subFs, wire_opposite_side,
segment_inf);
/* Build edges for a GSB */
build_edges_for_one_tileable_rr_gsb(rr_graph, &rr_gsb,
@ -904,6 +910,8 @@ void build_tileable_unidir_rr_graph(INP const int L_num_types,
INP t_type_ptr types, INP const int L_nx, INP const int L_ny,
INP struct s_grid_tile **L_grid, INP const int chan_width,
INP const enum e_switch_block_type sb_type, INP const int Fs,
INP const enum e_switch_block_type sb_subtype, INP const int subFs,
INP const boolean wire_opposite_side,
INP const int num_seg_types,
INP const t_segment_inf * segment_inf,
INP const int num_switches, INP const int delayless_switch,
@ -1021,7 +1029,7 @@ void build_tileable_unidir_rr_graph(INP const int L_num_types,
/* Create edges for a tileable rr_graph */
build_rr_graph_edges(&rr_graph, device_size, grids, device_chan_width, segment_infs,
Fc_in, Fc_out,
sb_type, Fs);
sb_type, Fs, sb_subtype, subFs, (bool)wire_opposite_side);
/************************************************************************
* 6.2 Build direction connection lists
@ -1034,7 +1042,7 @@ void build_tileable_unidir_rr_graph(INP const int L_num_types,
build_rr_graph_direct_connections(&rr_graph, device_size, grids, delayless_switch,
num_directs, directs, clb_to_clb_directs);
print_rr_graph_stats(rr_graph);
//print_rr_graph_stats(rr_graph);
/* Clear driver switches of the rr_graph */
clear_rr_graph_driver_switch(&rr_graph);

View File

@ -5,10 +5,14 @@
#include "vpr_types.h"
int adapt_to_tileable_route_chan_width(int chanW, t_segment_inf* segment_inf);
void build_tileable_unidir_rr_graph(INP const int L_num_types,
INP t_type_ptr types, INP const int L_nx, INP const int L_ny,
INP struct s_grid_tile **L_grid, INP const int chan_width,
INP const enum e_switch_block_type sb_type, INP const int Fs,
INP const enum e_switch_block_type sb_subtype, INP const int subFs,
INP const boolean wire_opposite_side,
INP const int num_seg_types,
INP const t_segment_inf * segment_inf,
INP const int num_switches, INP const int delayless_switch,

View File

@ -357,6 +357,7 @@ void build_gsb_one_group_track_to_track_map(const t_rr_graph* rr_graph,
const RRGSB& rr_gsb,
const enum e_switch_block_type sb_type,
const int Fs,
const bool wire_opposite_side,
const t_track_group from_tracks, /* [0..gsb_side][track_indices] */
const t_track_group to_tracks, /* [0..gsb_side][track_indices] */
t_track2track_map* track2track_map) {
@ -387,7 +388,11 @@ void build_gsb_one_group_track_to_track_map(const t_rr_graph* rr_graph,
if (from_side == to_side) {
continue;
}
/* Bypass those from_side is opposite to to_side if required */
if ( (true == wire_opposite_side)
&& (to_side_manager.get_opposite() == from_side) ) {
continue;
}
/* Get other track_ids depending on the switch block pattern */
/* Find the track ids that will start at the other sides */
std::vector<size_t> to_track_ids = get_switch_block_to_track_id(sb_type, Fs, from_side, inode,
@ -475,6 +480,9 @@ t_track2track_map build_gsb_track_to_track_map(const t_rr_graph* rr_graph,
const RRGSB& rr_gsb,
const enum e_switch_block_type sb_type,
const int Fs,
const enum e_switch_block_type sb_subtype,
const int subFs,
const bool wire_opposite_side,
const std::vector<t_segment_inf> segment_inf) {
t_track2track_map track2track_map; /* [0..gsb_side][0..chan_width][track_indices] */
@ -547,7 +555,8 @@ t_track2track_map build_gsb_track_to_track_map(const t_rr_graph* rr_graph,
/* For Group 1: we build connections between end_tracks and start_tracks*/
build_gsb_one_group_track_to_track_map(rr_graph, rr_gsb,
sb_type, Fs,
end_tracks, start_tracks,
true, /* End tracks should always to wired to start tracks */
end_tracks, start_tracks,
&track2track_map);
/* For Group 2: we build connections between end_tracks and start_tracks*/
@ -555,7 +564,8 @@ t_track2track_map build_gsb_track_to_track_map(const t_rr_graph* rr_graph,
* TODO: This can be improved with different patterns!
*/
build_gsb_one_group_track_to_track_map(rr_graph, rr_gsb,
sb_type, Fs,
sb_subtype, subFs,
wire_opposite_side, /* Pass tracks may not be wired to start tracks */
pass_tracks, start_tracks,
&track2track_map);
@ -1052,52 +1062,53 @@ void build_gsb_one_ipin_track2pin_map(const t_rr_graph* rr_graph,
assert (0 == actual_track_list.size() % 2);
/* Scale Fc */
int actual_Fc = Fc * actual_track_list.size() / chan_width;
int actual_Fc = std::ceil((float)Fc * (float)actual_track_list.size() / (float)chan_width);
/* Minimum Fc should be 2 : ensure we will connect to a pair of routing tracks */
actual_Fc = std::max(2, actual_Fc);
actual_Fc = std::max(1, actual_Fc);
/* Compute the step between two connection from this IPIN to tracks:
* step = W' / Fc', W' and Fc' are the adapted W and Fc from actual_track_list and Fc_in
*/
size_t track_step = actual_track_list.size() / actual_Fc;
/* Track step mush be a multiple of 2!!!*/
if (0 != track_step % 2) {
track_step--; /* minus 1 to increase connectivity */
}
*/
size_t track_step = std::floor((float)actual_track_list.size() / (float)actual_Fc);
/* Make sure step should be at least 2 */
track_step = std::max(2, (int)track_step);
track_step = std::max(1, (int)track_step);
/* Adapt offset to the range of actual_track_list */
size_t actual_offset = offset % actual_track_list.size();
/* rotate the track list by an offset */
std::rotate(actual_track_list.begin(), actual_track_list.begin() + actual_offset, actual_track_list.end());
if (0 < actual_offset) {
std::rotate(actual_track_list.begin(), actual_track_list.begin() + actual_offset, actual_track_list.end());
}
/* Assign tracks: since we assign 2 track per round, we increment itrack by 2* step */
int track_cnt = 0;
/* Keep assigning until we meet the Fc requirement */
for (size_t itrack = 0; itrack < actual_track_list.size(); itrack = itrack + 2 * track_step) {
/* Update pin2track map */
size_t chan_side_index = chan_side_manager.to_size_t();
size_t ipin_index = ipin_node - rr_graph->rr_node;
/* itrack may exceed the size of actual_track_list, adapt it */
size_t actual_itrack = itrack % actual_track_list.size();
/* track_index may exceed the chan_width(), adapt it */
size_t track_index = actual_track_list[itrack] % chan_width;
size_t track_index = actual_track_list[actual_itrack] % chan_width;
(*track2ipin_map)[chan_side_index][track_index].push_back(ipin_index);
/* track_index may exceed the chan_width(), adapt it */
track_index = (actual_track_list[itrack] + 1) % chan_width;
track_index = (actual_track_list[actual_itrack] + 1) % chan_width;
(*track2ipin_map)[chan_side_index][track_index].push_back(ipin_index);
track_cnt += 2;
/* Stop when we have enough Fc: this may lead to some tracks have zero drivers.
* So I comment it. And we just make sure its track_cnt >= actual_Fc
if (actual_Fc == track_cnt) {
break;
}
*/
}
/* Ensure the number of tracks is similar to Fc */
//printf("Fc_in=%d, track_cnt=%d\n", actual_Fc, track_cnt);
assert (actual_Fc <= track_cnt);
/* Give a warning if Fc is < track_cnt */
/*
if (actual_Fc != track_cnt) {
vpr_printf(TIO_MESSAGE_INFO,
"IPIN Node(%lu) will have a different Fc(=%lu) than specified(=%lu)!\n",
ipin_node - rr_graph->rr_node, track_cnt, actual_Fc);
}
*/
}
return;
@ -1153,13 +1164,13 @@ void build_gsb_one_opin_pin2track_map(const t_rr_graph* rr_graph,
}
/* Scale Fc */
int actual_Fc = Fc * actual_track_list.size() / chan_width;
int actual_Fc = std::ceil((float)Fc * (float)actual_track_list.size() / (float)chan_width);
/* Minimum Fc should be 1 : ensure we will drive 1 routing track */
actual_Fc = std::max(1, actual_Fc);
/* Compute the step between two connection from this IPIN to tracks:
* step = W' / Fc', W' and Fc' are the adapted W and Fc from actual_track_list and Fc_in
*/
size_t track_step = actual_track_list.size() / actual_Fc;
size_t track_step = std::floor((float)actual_track_list.size() / (float)actual_Fc);
/* Track step mush be a multiple of 2!!!*/
/* Make sure step should be at least 1 */
track_step = std::max(1, (int)track_step);
@ -1174,10 +1185,13 @@ void build_gsb_one_opin_pin2track_map(const t_rr_graph* rr_graph,
/* Assign tracks */
int track_cnt = 0;
/* Keep assigning until we meet the Fc requirement */
for (size_t itrack = 0; itrack < actual_track_list.size(); itrack = itrack + track_step) {
/* Update pin2track map */
size_t opin_side_index = opin_side_manager.to_size_t();
size_t track_index = actual_track_list[itrack];
/* itrack may exceed the size of actual_track_list, adapt it */
size_t actual_itrack = itrack % actual_track_list.size();
size_t track_index = actual_track_list[actual_itrack];
size_t track_rr_node_index = rr_gsb.get_chan_node(chan_side, track_index) - rr_graph->rr_node;
(*opin2track_map)[opin_side_index][opin_node_id].push_back(track_rr_node_index);
/* update track counter */
@ -1191,8 +1205,14 @@ void build_gsb_one_opin_pin2track_map(const t_rr_graph* rr_graph,
}
/* Ensure the number of tracks is similar to Fc */
//printf("Fc_out=%lu, scaled_Fc_out=%d, track_cnt=%d, actual_track_cnt=%lu/%lu\n", Fc, actual_Fc, track_cnt, actual_track_list.size(), chan_width);
assert (actual_Fc <= track_cnt);
/* Give a warning if Fc is < track_cnt */
/*
if (actual_Fc != track_cnt) {
vpr_printf(TIO_MESSAGE_INFO,
"OPIN Node(%lu) will have a different Fc(=%lu) than specified(=%lu)!\n",
opin_node_id, track_cnt, actual_Fc);
}
*/
}
return;

View File

@ -23,6 +23,9 @@ t_track2track_map build_gsb_track_to_track_map(const t_rr_graph* rr_graph,
const RRGSB& rr_gsb,
const enum e_switch_block_type sb_type,
const int Fs,
const enum e_switch_block_type sb_subtype,
const int subFs,
const bool wire_opposite_side,
const std::vector<t_segment_inf> segment_inf);
RRGSB build_one_tileable_rr_gsb(const DeviceCoordinator& device_range,

View File

@ -1,3 +1,41 @@
/**********************************************************
* MIT License
*
* Copyright (c) 2018 LNIS - The University of Utah
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
***********************************************************************/
/************************************************************************
* Filename: fpga_x2p_bitstream_utils.c
* Created by: Xifan Tang
* Change history:
* +-------------------------------------+
* | Date | Author | Notes
* +-------------------------------------+
* | 2019/07/02 | Xifan Tang | Created
* +-------------------------------------+
***********************************************************************/
/************************************************************************
* This file contains most utilized functions for the bitstream generator
***********************************************************************/
/***********************************/
/* Synthesizable Verilog Dumping */
/* Xifan TANG, EPFL/LSI */
@ -30,11 +68,14 @@
#include "fpga_x2p_mux_utils.h"
#include "fpga_x2p_globals.h"
#include "fpga_x2p_bitstream_utils.h"
/* Determine the size of input address of a decoder */
int determine_decoder_size(int num_addr_out) {
return ceil(log(num_addr_out)/log(2.));
}
static
int count_num_sram_bits_one_lut_spice_model(t_spice_model* cur_spice_model) {
int num_sram_bits = 0;
int iport;
@ -98,6 +139,7 @@ int count_num_sram_bits_one_lut_spice_model(t_spice_model* cur_spice_model) {
return num_sram_bits;
}
static
int count_num_sram_bits_one_mux_spice_model(t_spice_model* cur_spice_model,
int mux_size) {
int num_sram_bits = 0;
@ -162,7 +204,7 @@ int count_num_sram_bits_one_mux_spice_model(t_spice_model* cur_spice_model,
return num_sram_bits;
}
static
int count_num_sram_bits_one_generic_spice_model(t_spice_model* cur_spice_model) {
int iport;
int num_sram_bits = 0;
@ -227,6 +269,7 @@ int count_num_sram_bits_one_spice_model(t_spice_model* cur_spice_model,
return -1;
}
static
int count_num_mode_bits_one_generic_spice_model(t_spice_model* cur_spice_model) {
int iport;
int num_mode_bits = 0;
@ -424,20 +467,7 @@ int count_num_reserved_conf_bits_one_mux_spice_model(t_spice_model* cur_spice_mo
break;
case SPICE_SRAM_SCAN_CHAIN:
case SPICE_SRAM_STANDALONE:
/* 4T1R MUX requires more configuration bits */
if (SPICE_MODEL_STRUCTURE_TREE == cur_spice_model->design_tech_info.mux_info->structure) {
/* For tree-structure: we need 3 times more config. bits */
num_reserved_conf_bits = 0;
} else if (SPICE_MODEL_STRUCTURE_MULTILEVEL == cur_spice_model->design_tech_info.mux_info->structure) {
/* For multi-level structure: we need 1 more config. bits for each level */
num_reserved_conf_bits = 0;
} else {
num_reserved_conf_bits = 0;
}
/* For 2:1 MUX, whatever structure, there is only one level */
if (2 == num_input_size) {
num_reserved_conf_bits = 0;
}
num_reserved_conf_bits = 0;
break;
default:
vpr_printf(TIO_MESSAGE_ERROR,"(FILE:%s,LINE[%d])Invalid type of SRAM organization!\n",
@ -533,7 +563,7 @@ int count_num_reserved_conf_bits_one_spice_model(t_spice_model* cur_spice_model,
return num_reserved_conf_bits;
}
static
int count_num_conf_bits_one_lut_spice_model(t_spice_model* cur_spice_model,
enum e_sram_orgz cur_sram_orgz_type) {
int num_conf_bits = 0;
@ -611,7 +641,7 @@ int count_num_conf_bits_one_lut_spice_model(t_spice_model* cur_spice_model,
return num_conf_bits;
}
static
int count_num_conf_bits_one_mux_spice_model(t_spice_model* cur_spice_model,
enum e_sram_orgz cur_sram_orgz_type,
int mux_size) {
@ -684,6 +714,7 @@ int count_num_conf_bits_one_mux_spice_model(t_spice_model* cur_spice_model,
return num_conf_bits;
}
static
int count_num_conf_bits_one_generic_spice_model(t_spice_model* cur_spice_model,
enum e_sram_orgz cur_sram_orgz_type) {
int num_conf_bits = 0;
@ -1098,9 +1129,9 @@ add_mux_conf_bits_to_llist(int mux_size,
}
/* Add SCFF configutration bits to a linked list*/
void
add_sram_scff_conf_bits_to_llist(t_sram_orgz_info* cur_sram_orgz_info,
int num_sram_bits, int* sram_bits) {
static
void add_sram_scff_conf_bits_to_llist(t_sram_orgz_info* cur_sram_orgz_info,
int num_sram_bits, int* sram_bits) {
int ibit, cur_mem_bit;
t_conf_bit** sram_bit = NULL;
t_spice_model* cur_sram_spice_model = NULL;
@ -1592,3 +1623,7 @@ void add_mux_conf_bits_to_sram_orgz_info(t_sram_orgz_info* cur_sram_orgz_info,
return;
}
/************************************************************************
* End of file : fpga_x2p_bitstream_utils.c
***********************************************************************/

View File

@ -1,3 +1,5 @@
#ifndef FPGA_X2P_BITSTREAM_UTILS_H
#define FPGA_X2P_BITSTREAM_UTILS_H
int determine_decoder_size(int num_addr_out);
@ -24,6 +26,9 @@ int count_num_reserved_conf_bits_one_spice_model(t_spice_model* cur_spice_model,
enum e_sram_orgz cur_sram_orgz_type,
int mux_size);
int count_num_reserved_conf_bit_one_interc(t_interconnect* cur_interc,
enum e_sram_orgz cur_sram_orgz_type);
void
add_mux_scff_conf_bits_to_llist(int mux_size,
t_sram_orgz_info* cur_sram_orgz_info,
@ -78,3 +83,5 @@ void add_sram_conf_bits_to_sram_orgz_info(t_sram_orgz_info* cur_sram_orgz_info,
void add_mux_conf_bits_to_sram_orgz_info(t_sram_orgz_info* cur_sram_orgz_info,
t_spice_model* mux_spice_model, int mux_size) ;
#endif

View File

@ -1,6 +1,8 @@
#ifndef FPGA_X2P_PBTYPES_UTILS_H
#define FPGA_X2P_PBTYPES_UTILS_H
#include "fpga_x2p_bitstream_utils.h"
void check_pb_graph_edge(t_pb_graph_edge pb_graph_edge);
void check_pb_graph_pin_edges(t_pb_graph_pin pb_graph_pin);
@ -74,9 +76,6 @@ void mark_one_pb_parasitic_nets(t_phy_pb* cur_pb);
int count_num_conf_bit_one_interc(t_interconnect* cur_interc,
enum e_sram_orgz cur_sram_orgz_type);
int count_num_reserved_conf_bit_one_interc(t_interconnect* cur_interc,
enum e_sram_orgz cur_sram_orgz_type);
int count_num_conf_bits_pb_type_mode_interc(t_mode* cur_pb_type_mode,
enum e_sram_orgz cur_sram_orgz_type);

View File

@ -51,8 +51,7 @@ void init_and_check_one_sram_inf_orgz(t_sram_inf_orgz* cur_sram_inf_orgz,
t_spice_model* spice_models);
static
void init_and_check_sram_inf(t_arch* arch,
t_det_routing_arch* routing_arch);
void init_and_check_sram_inf(t_arch* arch);
static
t_llist* check_and_add_one_global_port_to_llist(t_llist* old_head,
@ -378,8 +377,7 @@ void init_and_check_one_sram_inf_orgz(t_sram_inf_orgz* cur_sram_inf_orgz,
}
static
void init_and_check_sram_inf(t_arch* arch,
t_det_routing_arch* routing_arch) {
void init_and_check_sram_inf(t_arch* arch) {
/* We have two branches:
* 1. SPICE SRAM organization information
* 2. Verilog SRAM organization information
@ -511,7 +509,7 @@ void init_check_arch_spice_models(t_arch* arch,
}
/* Step C: Find SRAM Model*/
init_and_check_sram_inf(arch, routing_arch);
init_and_check_sram_inf(arch);
/* Step D: Find the segment spice_model*/
for (i = 0; i < arch->num_segments; i++) {
@ -689,7 +687,7 @@ void rec_identify_pb_type_phy_mode(t_pb_type* cur_pb_type) {
/* Identify physical mode of pb_types in each defined complex block */
static
void init_check_arch_pb_type_idle_and_phy_mode(t_arch* Arch) {
void init_check_arch_pb_type_idle_and_phy_mode() {
int itype;
for (itype = 0; itype < num_types; itype++) {
@ -1310,12 +1308,19 @@ void fpga_x2p_free(t_arch* Arch) {
/* Top-level function of FPGA-SPICE setup */
void fpga_x2p_setup(t_vpr_setup vpr_setup,
t_arch* Arch) {
/* Timer */
clock_t t_start;
clock_t t_end;
float run_time_sec;
int num_rename_violation = 0;
int num_clocks = 0;
float vpr_crit_path_delay = 0.;
float vpr_clock_freq = 0.;
float vpr_clock_period = 0.;
/* Start time count */
t_start = clock();
vpr_printf(TIO_MESSAGE_INFO, "\nFPGA-SPICE Tool suites Initilization begins...\n");
@ -1323,7 +1328,7 @@ void fpga_x2p_setup(t_vpr_setup vpr_setup,
init_check_arch_spice_models(Arch, &(vpr_setup.RoutingArch));
/* Initialize idle mode and physical mode of each pb_type and pb_graph_node */
init_check_arch_pb_type_idle_and_phy_mode(Arch);
init_check_arch_pb_type_idle_and_phy_mode();
/* Create and initialize a linked list for global ports */
global_ports_head = init_llist_global_ports(Arch->spice);
@ -1465,6 +1470,13 @@ void fpga_x2p_setup(t_vpr_setup vpr_setup,
spice_net_info_add_density_weight(vpr_setup.FPGA_SPICE_Opts.signal_density_weight);
}
/* End time count */
t_end = clock();
run_time_sec = (float)(t_end - t_start) / CLOCKS_PER_SEC;
vpr_printf(TIO_MESSAGE_INFO, "FPGA X2P setup took %g seconds\n", run_time_sec);
return;
}

View File

@ -1319,6 +1319,14 @@ DeviceRRGSB build_device_rr_gsb(boolean output_sb_xml, char* sb_xml_dir,
int LL_num_rr_nodes, t_rr_node* LL_rr_node,
t_ivec*** LL_rr_node_indices, int num_segments,
t_rr_indexed_data* LL_rr_indexed_data) {
/* Timer */
clock_t t_start;
clock_t t_end;
float run_time_sec;
/* Start time count */
t_start = clock();
/* Create an object */
DeviceRRGSB LL_device_rr_gsb;
@ -1394,6 +1402,12 @@ DeviceRRGSB build_device_rr_gsb(boolean output_sb_xml, char* sb_xml_dir,
}
}
/* End time count */
t_end = clock();
run_time_sec = (float)(t_end - t_start) / CLOCKS_PER_SEC;
vpr_printf(TIO_MESSAGE_INFO, "Routing architecture uniqifying took %g seconds\n", run_time_sec);
return LL_device_rr_gsb;
}

View File

@ -30,6 +30,8 @@
#include "fpga_x2p_bitstream_utils.h"
#include "fpga_bitstream_primitives.h"
#include "fpga_bitstream_pbtypes.h"
/***** Subroutines *****/

View File

@ -1,3 +1,41 @@
/**********************************************************
* MIT License
*
* Copyright (c) 2018 LNIS - The University of Utah
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
***********************************************************************/
/************************************************************************
* Filename: fpga_x2p_router.c
* Created by: Xifan Tang
* Change history:
* +-------------------------------------+
* | Date | Author | Notes
* +-------------------------------------+
* | 2019/07/02 | Xifan Tang | Created
* +-------------------------------------+
***********************************************************************/
/************************************************************************
* This file contains a breadth-first router which is tailored for packer
***********************************************************************/
#include <stdio.h>
#include <assert.h>
#include <string.h>
@ -14,6 +52,8 @@
#include "fpga_x2p_rr_graph_utils.h"
#include "fpga_x2p_pb_rr_graph.h"
#include "fpga_x2p_router.h"
void breadth_first_expand_rr_graph_trace_segment(t_rr_graph* local_rr_graph,
t_trace *start_ptr,
int remaining_connections_to_sink) {
@ -145,6 +185,7 @@ void breadth_first_add_source_to_rr_graph_heap(t_rr_graph* local_rr_graph,
/* A copy of breath_first_add_source_to_heap_cluster
* I remove all the use of global variables
*/
static
void breadth_first_add_one_source_to_rr_graph_heap(t_rr_graph* local_rr_graph,
int src_net_index,
int src_idx) {
@ -249,6 +290,7 @@ boolean breadth_first_route_one_net_pb_rr_graph(t_rr_graph* local_rr_graph,
/* Adapt for the multi-source rr_graph routing
*/
static
boolean breadth_first_route_one_single_source_net_pb_rr_graph(t_rr_graph* local_rr_graph,
int inet, int isrc,
int start_isink,
@ -434,6 +476,7 @@ boolean breadth_first_route_one_single_source_net_pb_rr_graph(t_rr_graph* local_
/* Adapt for the multi-source rr_graph routing
*/
static
boolean breadth_first_route_one_multi_source_net_pb_rr_graph(t_rr_graph* local_rr_graph,
int inet) {
@ -550,7 +593,7 @@ boolean breadth_first_route_one_multi_source_net_pb_rr_graph(t_rr_graph* local_r
return route_success;
}
static
boolean feasible_routing_rr_graph(t_rr_graph* local_rr_graph,
boolean verbose) {
@ -845,5 +888,7 @@ boolean try_breadth_first_route_pb_rr_graph(t_rr_graph* local_rr_graph) {
return (FALSE);
}
/************************************************************************
* End of file : fpga_x2p_router.c
***********************************************************************/

View File

@ -80,8 +80,7 @@ static void init_spice_mux_testbench_globals(t_spice spice) {
}
static
void fprint_spice_mux_testbench_global_ports(FILE* fp,
t_spice spice) {
void fprint_spice_mux_testbench_global_ports(FILE* fp) {
/* A valid file handler*/
if (NULL == fp) {
vpr_printf(TIO_MESSAGE_ERROR,"(FILE:%s,LINE[%d])Invalid File Handler!\n",__FILE__, __LINE__);
@ -641,7 +640,6 @@ void fprint_spice_mux_testbench_pb_pin_mux(FILE* fp,
static
void fprint_spice_mux_testbench_pb_graph_node_pin_interc(FILE* fp,
enum e_spice_pin2pin_interc_type pin2pin_interc_type,
t_pb_graph_pin* des_pb_graph_pin,
t_mode* cur_mode,
int select_path_id,
@ -734,7 +732,6 @@ static
void fprint_spice_mux_testbench_pb_pin_interc(FILE* fp,
t_rr_node* pb_rr_graph,
t_phy_pb* des_pb,
enum e_spice_pin2pin_interc_type pin2pin_interc_type,
t_pb_graph_pin* des_pb_graph_pin,
t_mode* cur_mode,
int select_path_id,
@ -856,7 +853,6 @@ void fprintf_spice_mux_testbench_pb_graph_port_interc(FILE* fp,
assert(NULL == cur_pb);
path_id = DEFAULT_PATH_ID;
fprint_spice_mux_testbench_pb_graph_node_pin_interc(fp,
INPUT2INPUT_INTERC,
&(cur_pb_graph_node->input_pins[iport][ipin]),
cur_mode,
path_id,
@ -878,7 +874,6 @@ void fprintf_spice_mux_testbench_pb_graph_port_interc(FILE* fp,
assert(DEFAULT_PATH_ID != path_id);
}
fprint_spice_mux_testbench_pb_pin_interc(fp, pb_rr_nodes, cur_pb, /* TODO: find out the child_pb*/
INPUT2INPUT_INTERC,
&(cur_pb_graph_node->input_pins[iport][ipin]),
cur_mode,
path_id,
@ -896,7 +891,6 @@ void fprintf_spice_mux_testbench_pb_graph_port_interc(FILE* fp,
assert(NULL == cur_pb);
path_id = DEFAULT_PATH_ID;
fprint_spice_mux_testbench_pb_graph_node_pin_interc(fp,
OUTPUT2OUTPUT_INTERC,
&(cur_pb_graph_node->output_pins[iport][ipin]),
cur_mode,
path_id,
@ -918,7 +912,6 @@ void fprintf_spice_mux_testbench_pb_graph_port_interc(FILE* fp,
assert(DEFAULT_PATH_ID != path_id);
}
fprint_spice_mux_testbench_pb_pin_interc(fp, pb_rr_nodes, cur_pb, /* TODO: find out the child_pb*/
OUTPUT2OUTPUT_INTERC,
&(cur_pb_graph_node->output_pins[iport][ipin]),
cur_mode,
path_id,
@ -936,7 +929,6 @@ void fprintf_spice_mux_testbench_pb_graph_port_interc(FILE* fp,
assert(NULL == cur_pb);
path_id = DEFAULT_PATH_ID;
fprint_spice_mux_testbench_pb_graph_node_pin_interc(fp,
INPUT2INPUT_INTERC,
&(cur_pb_graph_node->clock_pins[iport][ipin]),
cur_mode,
path_id,
@ -958,7 +950,6 @@ void fprintf_spice_mux_testbench_pb_graph_port_interc(FILE* fp,
assert(DEFAULT_PATH_ID != path_id);
}
fprint_spice_mux_testbench_pb_pin_interc(fp, pb_rr_nodes, cur_pb, /* TODO: find out the child_pb*/
INPUT2INPUT_INTERC,
&(cur_pb_graph_node->clock_pins[iport][ipin]),
cur_mode,
path_id,
@ -1254,6 +1245,7 @@ void fprint_spice_mux_testbench_cb_one_mux(FILE* fp,
return;
}
static
void fprint_spice_mux_testbench_cb_interc(FILE* fp,
t_cb cur_cb_info,
t_rr_node* src_rr_node,
@ -1481,8 +1473,7 @@ int fprint_spice_mux_testbench_sb_one_mux(FILE* fp,
static
int fprint_spice_mux_testbench_call_one_grid_sb_muxes(FILE* fp,
t_sb cur_sb_info,
t_ivec*** LL_rr_node_indices) {
t_sb cur_sb_info) {
int itrack, side;
int used = 0;
@ -1672,6 +1663,7 @@ void fprint_spice_mux_testbench_measurements(FILE* fp,
}
/* Top-level function in this source file */
static
int fprint_spice_one_mux_testbench(char* formatted_spice_dir,
char* circuit_name,
char* mux_testbench_name,
@ -1743,7 +1735,7 @@ int fprint_spice_one_mux_testbench(char* formatted_spice_dir,
fprint_spice_options(fp, arch.spice->spice_params);
/* Global nodes: Vdd for SRAMs, Logic Blocks(Include IO), Switch Boxes, Connection Boxes */
fprint_spice_mux_testbench_global_ports(fp, *(arch.spice));
fprint_spice_mux_testbench_global_ports(fp);
/* Quote defined Logic blocks subckts (Grids) */
init_spice_mux_testbench_globals(*(arch.spice));
@ -1798,7 +1790,7 @@ int fprint_spice_one_mux_testbench(char* formatted_spice_dir,
case SPICE_SB_MUX_TB:
total_sb_mux_input_density = 0.;
/* Output a sb_mux testbench */
used = fprint_spice_mux_testbench_call_one_grid_sb_muxes(fp, sb_info[grid_x][grid_y], LL_rr_node_indices);
used = fprint_spice_mux_testbench_call_one_grid_sb_muxes(fp, sb_info[grid_x][grid_y]);
/* Check and output info. */
assert((0 == testbench_sb_mux_cnt)||(0 < testbench_sb_mux_cnt));
if (0 < testbench_sb_mux_cnt) {

View File

@ -1,3 +1,45 @@
/**********************************************************
* MIT License
*
* Copyright (c) 2018 LNIS - The University of Utah
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
***********************************************************************/
/************************************************************************
* Filename: spice_routing.c
* Created by: Xifan Tang
* Change history:
* +-------------------------------------+
* | Date | Author | Notes
* +-------------------------------------+
* | 2019/07/02 | Xifan Tang | Created
* +-------------------------------------+
***********************************************************************/
/************************************************************************
* This file contains functions to output SPICE netlists of routing resources
* i.e., Switch Block(SB), Connection Block (CB) and routing channels.
* Each module will be placed in an individual subckt, which be called
* through SPICE testbenches
* Note that each subckt is a configured module (SB, CB or Routing channals)
***********************************************************************/
/***********************************/
/* SPICE Modeling for VPR */
/* Xifan TANG, EPFL/LSI */
@ -39,7 +81,7 @@
#include "fpga_x2p_backannotate_utils.h"
#include "spice_routing.h"
static
void fprint_routing_chan_subckt(char* subckt_dir,
int x, int y, t_rr_type chan_type,
int LL_num_rr_nodes, t_rr_node* LL_rr_node,
@ -629,11 +671,9 @@ void fprint_switch_box_interc(FILE* fp,
* For channels chanX with INC_DIRECTION on the right side, they should be marked as outputs
* For channels chanX with DEC_DIRECTION on the right side, they should be marked as inputs
*/
static
void fprint_routing_switch_box_subckt(char* subckt_dir,
t_sb cur_sb_info,
int LL_num_rr_nodes, t_rr_node* LL_rr_node,
t_ivec*** LL_rr_node_indices,
boolean compact_routing_hierarchy) {
t_sb cur_sb_info) {
int itrack, inode, side, ix, iy, x, y;
FILE* fp = NULL;
char* fname = NULL;
@ -755,7 +795,15 @@ void fprint_connection_box_short_interc(FILE* fp,
assert(1 == src_rr_node->fan_in);
/* Check the driver*/
drive_rr_node = &(rr_node[src_rr_node->prev_node]);
drive_rr_node = src_rr_node->drive_rr_nodes[0];
/* We have OPINs since we may have direct connections:
* These connections should be handled by other functions in the compact_netlist.c
* So we just return here for OPINs
*/
if (OPIN == drive_rr_node->type) {
return;
}
assert((CHANX == drive_rr_node->type)||(CHANY == drive_rr_node->type));
check_flag = 0;
for (iedge = 0; iedge < drive_rr_node->num_edges; iedge++) {
@ -996,11 +1044,9 @@ void fprint_connection_box_interc(FILE* fp,
* | | Connection | |
* --------------Box_Y[x][y-1]--------------
*/
static
void fprint_routing_connection_box_subckt(char* subckt_dir,
t_cb cur_cb_info,
int LL_num_rr_nodes, t_rr_node* LL_rr_node,
t_ivec*** LL_rr_node_indices,
boolean compact_routing_hierarchy) {
t_cb cur_cb_info) {
int itrack, inode, side, x, y;
int side_cnt = 0;
@ -1104,6 +1150,7 @@ void fprint_routing_connection_box_subckt(char* subckt_dir,
fprintf(fp, "***** Head of scan-chain *****\n");
fprintf(fp, "Rcbx[%d][%d]_sc_head cbx[%d][%d]_sc_head %s[%d]->in 0\n",
x, y, x, y, sram_spice_model->prefix, sram_spice_model->cnt);
break;
case CHANY:
fprintf(fp, "***** Head of scan-chain *****\n");
fprintf(fp, "Rcby[%d][%d]_sc_head cby[%d][%d]_sc_head %s[%d]->in 0\n",
@ -1141,6 +1188,7 @@ void fprint_routing_connection_box_subckt(char* subckt_dir,
fprintf(fp, "***** Tail of scan-chain *****\n");
fprintf(fp, "Rcbx[%d][%d]_sc_tail cbx[%d][%d]_sc_tail %s[%d]->in 0\n",
x, y, x, y, sram_spice_model->prefix, sram_spice_model->cnt);
break;
case CHANY:
fprintf(fp, "***** Tail of scan-chain *****\n");
fprintf(fp, "Rcby[%d][%d]_sc_tail cby[%d][%d]_sc_tail %s[%d]->in 0\n",
@ -1173,8 +1221,7 @@ void generate_spice_routing_resources(char* subckt_dir,
t_arch arch,
t_det_routing_arch* routing_arch,
int LL_num_rr_nodes, t_rr_node* LL_rr_node,
t_ivec*** LL_rr_node_indices,
boolean compact_routing_hierarchy) {
t_ivec*** LL_rr_node_indices) {
int ix, iy;
assert(UNI_DIRECTIONAL == routing_arch->directionality);
@ -1221,9 +1268,7 @@ void generate_spice_routing_resources(char* subckt_dir,
for (ix = 0; ix < (nx + 1); ix++) {
for (iy = 0; iy < (ny + 1); iy++) {
update_spice_models_routing_index_low(ix, iy, SOURCE, arch.spice->num_spice_model, arch.spice->spice_models);
fprint_routing_switch_box_subckt(subckt_dir, sb_info[ix][iy],
LL_num_rr_nodes, LL_rr_node, LL_rr_node_indices,
compact_routing_hierarchy);
fprint_routing_switch_box_subckt(subckt_dir, sb_info[ix][iy]);
update_spice_models_routing_index_high(ix, iy, SOURCE, arch.spice->num_spice_model, arch.spice->spice_models);
}
}
@ -1237,9 +1282,7 @@ void generate_spice_routing_resources(char* subckt_dir,
/* Check if this cby_info exists, it may be covered by a heterogenous block */
if ((TRUE == is_cb_exist(CHANX, ix, iy))
&&(0 < count_cb_info_num_ipin_rr_nodes(cbx_info[ix][iy]))) {
fprint_routing_connection_box_subckt(subckt_dir, cbx_info[ix][iy],
LL_num_rr_nodes, LL_rr_node, LL_rr_node_indices,
compact_routing_hierarchy);
fprint_routing_connection_box_subckt(subckt_dir, cbx_info[ix][iy]);
}
update_spice_models_routing_index_high(ix, iy, CHANX, arch.spice->num_spice_model, arch.spice->spice_models);
}
@ -1251,9 +1294,7 @@ void generate_spice_routing_resources(char* subckt_dir,
/* Check if this cby_info exists, it may be covered by a heterogenous block */
if ((TRUE == is_cb_exist(CHANY, ix, iy))
&&(0 < count_cb_info_num_ipin_rr_nodes(cby_info[ix][iy]))) {
fprint_routing_connection_box_subckt(subckt_dir, cby_info[ix][iy],
LL_num_rr_nodes, LL_rr_node, LL_rr_node_indices,
compact_routing_hierarchy);
fprint_routing_connection_box_subckt(subckt_dir, cby_info[ix][iy]);
}
update_spice_models_routing_index_high(ix, iy, CHANY, arch.spice->num_spice_model, arch.spice->spice_models);
}
@ -1267,3 +1308,6 @@ void generate_spice_routing_resources(char* subckt_dir,
return;
}
/************************************************************************
* End of file : spice_routing.c
***********************************************************************/

View File

@ -67,6 +67,5 @@ void generate_spice_routing_resources(char* subckt_dir,
t_arch arch,
t_det_routing_arch* routing_arch,
int LL_num_rr_nodes, t_rr_node* LL_rr_node,
t_ivec*** LL_rr_node_indices,
boolean compact_routing_hierarchy);
t_ivec*** LL_rr_node_indices);

View File

@ -737,8 +737,7 @@ void generate_spice_subckts(char* subckt_dir,
/* 6. Generate Routing architecture*/
vpr_printf(TIO_MESSAGE_INFO, "Writing Routing Resources....\n");
generate_spice_routing_resources(subckt_dir, (*arch), routing_arch,
num_rr_nodes, rr_node, rr_node_indices,
compact_routing_hierarchy);
num_rr_nodes, rr_node, rr_node_indices);
/* 7. Generate Logic Blocks */
vpr_printf(TIO_MESSAGE_INFO,"Writing Logic Blocks...\n");

View File

@ -1489,6 +1489,7 @@ void fprint_spice_toplevel_one_grid_side_pin_with_given_index(FILE* fp,
}
/* Apply a CLB to CLB direct connection to a SPICE netlist */
static
void fprint_spice_one_clb2clb_direct(FILE* fp,
int from_grid_x, int from_grid_y,
int to_grid_x, int to_grid_y,
@ -1505,7 +1506,7 @@ void fprint_spice_one_clb2clb_direct(FILE* fp,
/* Check bandwidth match between from_clb and to_clb pins */
if (0 != (cur_direct->from_clb_pin_end_index - cur_direct->from_clb_pin_start_index
- cur_direct->to_clb_pin_end_index - cur_direct->to_clb_pin_start_index)) {
- (cur_direct->to_clb_pin_end_index - cur_direct->to_clb_pin_start_index))) {
vpr_printf(TIO_MESSAGE_ERROR, "(%s, [LINE%d]) Unmatch pin bandwidth in direct connection (name=%s)!\n",
__FILE__, __LINE__, cur_direct->name);
exit(1);
@ -1619,6 +1620,7 @@ void fprint_spice_clb2clb_directs(FILE* fp,
* 2. Their corresponding rr_node (SINK or IPIN) has 0 fan-in.
* In these cases, we short connect them to global GND.
*/
static
void fprint_grid_float_port_stimulation(FILE* fp) {
int inode;
int num_float_port = 0;

View File

@ -459,8 +459,11 @@ static void alloc_routing_structs(struct s_router_opts router_opts,
}
build_rr_graph(graph_type, num_types, dummy_type_descriptors, nx, ny, grid,
chan_width_x[0], NULL, det_routing_arch.switch_block_type,
det_routing_arch.Fs, det_routing_arch.num_segment,
chan_width_x[0], NULL,
det_routing_arch.switch_block_type, det_routing_arch.Fs,
det_routing_arch.switch_block_sub_type, det_routing_arch.sub_Fs,
det_routing_arch.wire_opposite_side,
det_routing_arch.num_segment,
det_routing_arch.num_switch, segment_inf,
det_routing_arch.global_route_switch,
det_routing_arch.delayless_switch, timing_inf,

View File

@ -297,8 +297,11 @@ boolean try_route(int width_fac, struct s_router_opts router_opts,
/* Set up the routing resource graph defined by this FPGA architecture. */
build_rr_graph(graph_type, num_types, type_descriptors, nx, ny, grid,
chan_width_x[0], NULL, det_routing_arch.switch_block_type,
det_routing_arch.Fs, det_routing_arch.num_segment,
chan_width_x[0], NULL,
det_routing_arch.switch_block_type, det_routing_arch.Fs,
det_routing_arch.switch_block_sub_type, det_routing_arch.sub_Fs,
det_routing_arch.wire_opposite_side,
det_routing_arch.num_segment,
det_routing_arch.num_switch, segment_inf,
det_routing_arch.global_route_switch,
det_routing_arch.delayless_switch, timing_inf,

View File

@ -17,6 +17,7 @@
#include "ReadOptions.h"
#include "tileable_rr_graph_builder.h"
#include "rr_graph_builder_utils.h"
/* Xifan TANG: SWSEG SUPPORT */
#include "rr_graph_swseg.h"
@ -211,7 +212,9 @@ void build_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
INP t_type_ptr types, INP int L_nx, INP int L_ny,
INP struct s_grid_tile **L_grid, INP int chan_width,
INP struct s_chan_width_dist *chan_capacity_inf,
INP enum e_switch_block_type sb_type, INP int Fs, INP int num_seg_types,
INP enum e_switch_block_type sb_type, INP int Fs,
INP enum e_switch_block_type sb_sub_type, INP int sub_Fs, INP boolean wire_opposite_side,
INP int num_seg_types,
INP int num_switches, INP t_segment_inf * segment_inf,
INP int global_route_switch, INP int delayless_switch,
INP t_timing_inf timing_inf, INP int wire_to_ipin_switch,
@ -225,7 +228,9 @@ void build_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
build_tileable_unidir_rr_graph(L_num_types, types,
L_nx, L_ny, L_grid,
chan_width,
sb_type, Fs, num_seg_types, segment_inf,
sb_type, Fs,
sb_sub_type, sub_Fs, wire_opposite_side,
num_seg_types, segment_inf,
num_switches, delayless_switch,
timing_inf, wire_to_ipin_switch,
base_cost_type, directs, num_directs, ignore_Fc_0, Warnings);
@ -242,6 +247,9 @@ void build_rr_graph(INP t_graph_type graph_type, INP int L_num_types,
}
/* Print statistics of RR graph */
print_rr_graph_stats();
return;
}

View File

@ -28,6 +28,7 @@ void build_rr_graph(INP t_graph_type graph_type,
INP struct s_chan_width_dist *chan_capacity_inf,
INP enum e_switch_block_type sb_type,
INP int Fs,
INP enum e_switch_block_type sb_sub_type, INP int sub_Fs, INP boolean wire_opposite_side,
INP int num_seg_types,
INP int num_switches,
INP t_segment_inf * segment_inf,

View File

@ -692,8 +692,8 @@ static float trans_per_mux(int num_inputs, float trans_sram_bit,
break;
case SPICE_MODEL_STRUCTURE_MULTILEVEL:
assert(1 < target_switch.switch_num_level);
sram_trans = trans_sram_bit * target_switch.switch_num_level;
mux_basis = determine_num_input_basis_multilevel_mux(num_inputs, target_switch.switch_num_level);
sram_trans = trans_sram_bit * target_switch.switch_num_level * mux_basis;
num_second_stage_trans = (int)pow((double)mux_basis, (double)(target_switch.switch_num_level - 1));
pass_trans = ((num_second_stage_trans - 1) * mux_basis/(mux_basis-1)) * pass_trans_area
+ num_inputs * pass_trans_area;

33
vpr7_x2p/vpr/go_ganesh.sh Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
echo "#################################################"
echo "The current shell environment is the following:"
echo $0
echo "#################################################"
# Example of how to run vprset circuit_name = pip_add
#set circuit_name = pip_add
circuit_name=sync_4bits_add
circuit_blif=${PWD}/Circuits/${circuit_name}.blif
arch_file=${PWD}/ARCH/k6_N10_scan_chain_ptm45nm_TT.xml
arch_file_template=${PWD}/ARCH/k6_N10_sram_chain_HC_template.xml
circuit_act=${PWD}/Circuits/${circuit_name}.act
circuit_verilog=${PWD}/Circuits/${circuit_name}.v
spice_output=${PWD}/spice_demo
verilog_output=${PWD}/verilog_demo
modelsim_ini=/uusoc/facility/cad_tools/Mentor/modelsim10.7b/modeltech/modelsim.ini
openfpga_path=${PWD}/../..
# Make sure a clean start
rm -rf ${spice_output}
rm -rf ${verilog_output}
echo "*******************************"
echo "THIS SCRIPT NEEDS TO BE SOURCED"
echo "source ./go.sh"
echo "*******************************"
sed "s:OPENFPGAPATH:${openfpga_path}:g" ${arch_file_template} > ${arch_file}
# Pack, place, and route a heterogeneous FPGA
# Packing uses the AAPack algorithm
./vpr ${arch_file} ${circuit_blif} --full_stats --nodisp --activity_file ${circuit_act} --route_chan_width 30 --fpga_spice --fpga_spice_rename_illegal_port --fpga_spice_dir ${spice_output} --fpga_spice_print_top_testbench