diff --git a/openfpga_flow/benchmarks/micro_benchmark/mac_8/mac_8.v b/openfpga_flow/benchmarks/micro_benchmark/mac_8/mac_8.v new file mode 100644 index 000000000..8175f6ca6 --- /dev/null +++ b/openfpga_flow/benchmarks/micro_benchmark/mac_8/mac_8.v @@ -0,0 +1,22 @@ +//------------------------------------------------------- +// Functionality: A 8-bit multiply-acculumate circuit +// Author: Xifan Tang +//------------------------------------------------------- + +module mac_8(a, b, c, out); +parameter DATA_WIDTH = 8; /* declare a parameter. default required */ +input [DATA_WIDTH - 1 : 0] a, b, c; +output [DATA_WIDTH - 1 : 0] out; + +assign out = a * b + c; + +endmodule + + + + + + + + + diff --git a/openfpga_flow/benchmarks/vtr_benchmark/LU32PEEng.v b/openfpga_flow/benchmarks/vtr_benchmark/LU32PEEng.v index ddda843d6..c42c26267 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/LU32PEEng.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/LU32PEEng.v @@ -2763,7 +2763,7 @@ module top_ram ( assign q = sub_wire0 | dummy; wire[32-1:0] dummy; assign dummy = junk_output & 32'b0; - dual_port_ram inst2( + dual_port_ram_4096x32 inst2( .clk (clk), .we1(wren), .we2(1'b0), @@ -3290,7 +3290,7 @@ begin // : STATUS_COUNTER else if ((wrreq) && (!rdreq) && (status_cnt != 64 )) status_cnt <= status_cnt + 1'b1; end - dual_port_ram ram_addr( + dual_port_ram_rfifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -3399,7 +3399,7 @@ begin // : STATUS_COUNTER status_cnt <= status_cnt + 1'b1; end assign usedw = status_cnt[`wFIFOSIZEWIDTH-1:0]; - dual_port_ram ram_addr( + dual_port_ram_wfifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -3473,7 +3473,7 @@ begin // : STATUS_COUNTER else if ((wrreq) && (!rdreq) && (status_cnt != 5'b10000)) status_cnt <= status_cnt + 1; end - dual_port_ram ram_addr( + dual_port_ram_afifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -3543,7 +3543,7 @@ begin // : STATUS_COUNTER else if ((wrreq) && (!rdreq) && (status_cnt != 16 )) status_cnt <= status_cnt + 1'b1; end - dual_port_ram ram_addr( + dual_port_ram_mfifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -5431,3 +5431,279 @@ module assemble(roundprod, special, y, sign, specialsign, rounded[`WIDTH-2:0]); endmodule + +//--------------------------------------- +// A dual-port RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram ( + input clk, + input we1, + input we2, + input [`rRAMSIZEWIDTH - 1 : 0] addr1, + input [`RAMWIDTH - 1 : 0] data1, + output [`RAMWIDTH - 1 : 0] out1, + input [`rRAMSIZEWIDTH - 1 : 0] addr2, + input [`RAMWIDTH - 1 : 0] data2, + output [`RAMWIDTH - 1 : 0] out2 +); + reg [`RAMWIDTH - 1 : 0] ram[2**`rRAMSIZEWIDTH - 1 : 0]; + reg [`RAMWIDTH - 1 : 0] data_out1; + reg [`RAMWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 4096x32 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_4096x32 ( + input clk, + input we1, + input we2, + input [12 - 1 : 0] addr1, + input [32 - 1 : 0] data1, + output [32 - 1 : 0] out1, + input [12 - 1 : 0] addr2, + input [32 - 1 : 0] data2, + output [32 - 1 : 0] out2 +); + reg [32 - 1 : 0] ram[2**12 - 1 : 0]; + reg [32 - 1 : 0] data_out1; + reg [32 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM rFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_rfifo ( + input clk, + input we1, + input we2, + input [`rFIFOSIZEWIDTH - 1 : 0] addr1, + input [`rFIFOINPUTWIDTH - 1 : 0] data1, + output [`rFIFOINPUTWIDTH - 1 : 0] out1, + input [`rFIFOSIZEWIDTH - 1 : 0] addr2, + input [`rFIFOINPUTWIDTH - 1 : 0] data2, + output [`rFIFOINPUTWIDTH - 1 : 0] out2 +); + reg [`rFIFOINPUTWIDTH - 1 : 0] ram[2**`rFIFOSIZEWIDTH - 1 : 0]; + reg [`rFIFOINPUTWIDTH - 1 : 0] data_out1; + reg [`rFIFOINPUTWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM wFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_wfifo ( + input clk, + input we1, + input we2, + input [`wFIFOSIZEWIDTH - 1 : 0] addr1, + input [`wFIFOINPUTWIDTH - 1 : 0] data1, + output [`wFIFOINPUTWIDTH - 1 : 0] out1, + input [`wFIFOSIZEWIDTH - 1 : 0] addr2, + input [`wFIFOINPUTWIDTH - 1 : 0] data2, + output [`wFIFOINPUTWIDTH - 1 : 0] out2 +); + reg [`wFIFOINPUTWIDTH - 1 : 0] ram[2**`wFIFOSIZEWIDTH - 1 : 0]; + reg [`wFIFOINPUTWIDTH - 1 : 0] data_out1; + reg [`wFIFOINPUTWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM wFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_afifo ( + input clk, + input we1, + input we2, + input [`aFIFOSIZEWIDTH - 1 : 0] addr1, + input [`aFIFOWIDTH - 1 : 0] data1, + output [`aFIFOWIDTH - 1 : 0] out1, + input [`aFIFOSIZEWIDTH - 1 : 0] addr2, + input [`aFIFOWIDTH - 1 : 0] data2, + output [`aFIFOWIDTH - 1 : 0] out2 +); + reg [`aFIFOWIDTH - 1 : 0] ram[2**`aFIFOSIZEWIDTH - 1 : 0]; + reg [`aFIFOWIDTH - 1 : 0] data_out1; + reg [`aFIFOWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM mFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_mfifo ( + input clk, + input we1, + input we2, + input [`mFIFOSIZEWIDTH - 1 : 0] addr1, + input [`mFIFOWIDTH - 1 : 0] data1, + output [`mFIFOWIDTH - 1 : 0] out1, + input [`mFIFOSIZEWIDTH - 1 : 0] addr2, + input [`mFIFOWIDTH - 1 : 0] data2, + output [`mFIFOWIDTH - 1 : 0] out2 +); + reg [`mFIFOWIDTH - 1 : 0] ram[2**`mFIFOSIZEWIDTH - 1 : 0]; + reg [`mFIFOWIDTH - 1 : 0] data_out1; + reg [`mFIFOWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/LU8PEEng.v b/openfpga_flow/benchmarks/vtr_benchmark/LU8PEEng.v index be2fb290c..489ecaa78 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/LU8PEEng.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/LU8PEEng.v @@ -2403,7 +2403,7 @@ module top_ram ( assign q = sub_wire0 | dummy; wire[32-1:0] dummy; assign dummy = junk_output & 32'b0; - dual_port_ram inst2( + dual_port_ram_256x32 inst2( .clk (clk), .we1(wren), .we2(1'b0), @@ -2882,7 +2882,7 @@ begin // : STATUS_COUNTER else if ((wrreq) && (!rdreq) && (status_cnt != 64 )) status_cnt <= status_cnt + 1'b1; end - dual_port_ram ram_addr( + dual_port_ram_rfifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -2967,7 +2967,7 @@ begin // : STATUS_COUNTER status_cnt <= status_cnt + 1'b1; end assign usedw = status_cnt[`wFIFOSIZEWIDTH-1:0]; - dual_port_ram ram_addr( + dual_port_ram_wfifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -3041,7 +3041,7 @@ begin // : STATUS_COUNTER else if ((wrreq) && (!rdreq) && (status_cnt != 5'b10000)) status_cnt <= status_cnt + 1; end - dual_port_ram ram_addr( + dual_port_ram_afifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -3111,7 +3111,7 @@ begin // : STATUS_COUNTER else if ((wrreq) && (!rdreq) && (status_cnt != 16 )) status_cnt <= status_cnt + 1'b1; end - dual_port_ram ram_addr( + dual_port_ram_mfifo ram_addr( .we1 (wrreq) , // write enable .we2 (rdreq) , // Read enable .addr1 (wr_pointer) , // address_0 input @@ -4999,3 +4999,279 @@ module assemble(roundprod, special, y, sign, specialsign, rounded[`WIDTH-2:0]); endmodule + +//--------------------------------------- +// A dual-port RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram ( + input clk, + input we1, + input we2, + input [`rRAMSIZEWIDTH - 1 : 0] addr1, + input [`RAMWIDTH - 1 : 0] data1, + output [`RAMWIDTH - 1 : 0] out1, + input [`rRAMSIZEWIDTH - 1 : 0] addr2, + input [`RAMWIDTH - 1 : 0] data2, + output [`RAMWIDTH - 1 : 0] out2 +); + reg [`RAMWIDTH - 1 : 0] ram[2**`rRAMSIZEWIDTH - 1 : 0]; + reg [`RAMWIDTH - 1 : 0] data_out1; + reg [`RAMWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 256x32 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_256x32 ( + input clk, + input we1, + input we2, + input [8 - 1 : 0] addr1, + input [32 - 1 : 0] data1, + output [32 - 1 : 0] out1, + input [8- 1 : 0] addr2, + input [32 - 1 : 0] data2, + output [32 - 1 : 0] out2 +); + reg [32 - 1 : 0] ram[2**8 - 1 : 0]; + reg [32 - 1 : 0] data_out1; + reg [32 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM rFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_rfifo ( + input clk, + input we1, + input we2, + input [`rFIFOSIZEWIDTH - 1 : 0] addr1, + input [`rFIFOINPUTWIDTH - 1 : 0] data1, + output [`rFIFOINPUTWIDTH - 1 : 0] out1, + input [`rFIFOSIZEWIDTH - 1 : 0] addr2, + input [`rFIFOINPUTWIDTH - 1 : 0] data2, + output [`rFIFOINPUTWIDTH - 1 : 0] out2 +); + reg [`rFIFOINPUTWIDTH - 1 : 0] ram[2**`rFIFOSIZEWIDTH - 1 : 0]; + reg [`rFIFOINPUTWIDTH - 1 : 0] data_out1; + reg [`rFIFOINPUTWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM wFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_wfifo ( + input clk, + input we1, + input we2, + input [`wFIFOSIZEWIDTH - 1 : 0] addr1, + input [`wFIFOINPUTWIDTH - 1 : 0] data1, + output [`wFIFOINPUTWIDTH - 1 : 0] out1, + input [`wFIFOSIZEWIDTH - 1 : 0] addr2, + input [`wFIFOINPUTWIDTH - 1 : 0] data2, + output [`wFIFOINPUTWIDTH - 1 : 0] out2 +); + reg [`wFIFOINPUTWIDTH - 1 : 0] ram[2**`wFIFOSIZEWIDTH - 1 : 0]; + reg [`wFIFOINPUTWIDTH - 1 : 0] data_out1; + reg [`wFIFOINPUTWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM wFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_afifo ( + input clk, + input we1, + input we2, + input [`aFIFOSIZEWIDTH - 1 : 0] addr1, + input [`aFIFOWIDTH - 1 : 0] data1, + output [`aFIFOWIDTH - 1 : 0] out1, + input [`aFIFOSIZEWIDTH - 1 : 0] addr2, + input [`aFIFOWIDTH - 1 : 0] data2, + output [`aFIFOWIDTH - 1 : 0] out2 +); + reg [`aFIFOWIDTH - 1 : 0] ram[2**`aFIFOSIZEWIDTH - 1 : 0]; + reg [`aFIFOWIDTH - 1 : 0] data_out1; + reg [`aFIFOWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM mFIFO +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_mfifo ( + input clk, + input we1, + input we2, + input [`mFIFOSIZEWIDTH - 1 : 0] addr1, + input [`mFIFOWIDTH - 1 : 0] data1, + output [`mFIFOWIDTH - 1 : 0] out1, + input [`mFIFOSIZEWIDTH - 1 : 0] addr2, + input [`mFIFOWIDTH - 1 : 0] data2, + output [`mFIFOWIDTH - 1 : 0] out2 +); + reg [`mFIFOWIDTH - 1 : 0] ram[2**`mFIFOSIZEWIDTH - 1 : 0]; + reg [`mFIFOWIDTH - 1 : 0] data_out1; + reg [`mFIFOWIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/boundtop.v b/openfpga_flow/benchmarks/vtr_benchmark/boundtop.v index a749b99d0..0a2dd03ce 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/boundtop.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/boundtop.v @@ -1656,7 +1656,33 @@ single_port_ram new_ram( endmodule - +//--------------------------------------- +// A single-port 1024x32bit RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module single_port_ram ( + input clk, + input we, + input [9:0] addr, + input [31:0] data, + output [31:0] out ); + + reg [31:0] ram[1023:0]; + reg [31:0] internal; + + assign out = internal; + + always @(posedge clk) begin + if(wen) begin + ram[addr] <= data; + end + + if(ren) begin + internal <= ram[addr]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/mcml.v b/openfpga_flow/benchmarks/vtr_benchmark/mcml.v index 1a9b6bb4e..1cbe22670 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/mcml.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/mcml.v @@ -1749,9 +1749,10 @@ wire [31:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 32'b00000000000000000000000000000000; -assign dont_care_out = 32'b00000000000000000000000000000000; +//Comment out for don't care outputs +//assign dont_care_out = 32'b00000000000000000000000000000000; -dual_port_ram dpram1( +dual_port_ram_8192x32 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -1784,9 +1785,10 @@ wire [31:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 32'b00000000000000000000000000000000; -assign dont_care_out = 32'b00000000000000000000000000000000; +//Comment out for don't care outputs +//assign dont_care_out = 32'b00000000000000000000000000000000; -dual_port_ram dpram1( +dual_port_ram_8192x32 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -1819,9 +1821,10 @@ wire [31:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 32'b00000000000000000000000000000000; -assign dont_care_out = 32'b00000000000000000000000000000000; +//Comment out for don't care outputs +//assign dont_care_out = 32'b00000000000000000000000000000000; -dual_port_ram dpram1( +dual_port_ram_8192x32 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -1854,9 +1857,10 @@ wire [31:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 32'b00000000000000000000000000000000; -assign dont_care_out = 32'b00000000000000000000000000000000; +//Comment out for don't care outputs +//assign dont_care_out = 32'b00000000000000000000000000000000; -dual_port_ram dpram1( +dual_port_ram_8192x32 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -1888,9 +1892,10 @@ wire [35:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 36'b000000000000000000000000000000000000; -assign dont_care_out = 36'b000000000000000000000000000000000000; +//Comment out for don't care outputs +//assign dont_care_out = 36'b000000000000000000000000000000000000; -dual_port_ram dpram1( +dual_port_ram_65536x36 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -1922,9 +1927,10 @@ wire [17:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 18'b000000000000000000; -assign dont_care_out = 18'b000000000000000000; +//Comment out for don't care outputs +//assign dont_care_out = 18'b000000000000000000; -dual_port_ram dpram1( +dual_port_ram_65536x18 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -1956,9 +1962,10 @@ wire [7:0] dont_care_out; assign const_zero = 1'b0; assign const_zero_data = 8'b00000000; -assign dont_care_out = 8'b00000000; +//Comment out for don't care outputs +//assign dont_care_out = 8'b00000000; -dual_port_ram dpram1( +dual_port_ram_65536x8 dpram1( .clk (clk), .we1(wren), .we2(const_zero), @@ -18279,8 +18286,8 @@ output [31:0] cosp; //Instantiate a single port ram for odin wire [31:0]blank; assign blank = 32'b000000000000000000000000000000; -single_port_ram sinp_replace(.clk (clock), .addr (pindex), .data (blank), .we (1'b0), .out (sinp)); -single_port_ram cosp_replace(.clk (clock), .addr (pindex), .data (blank), .we (1'b0), .out (cosp)); +single_port_ram_1024x32 sinp_replace(.clk (clock), .addr (pindex), .data (blank), .we (1'b0), .out (sinp)); +single_port_ram_1024x32 cosp_replace(.clk (clock), .addr (pindex), .data (blank), .we (1'b0), .out (cosp)); endmodule @@ -24774,4 +24781,242 @@ module Sqrt_64b (clk, num_, res); endmodule +//--------------------------------------- +// A dual-port RAM 8192x32 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_8192x32 ( + input clk, + input we1, + input we2, + input [13 - 1 : 0] addr1, + input [32 - 1 : 0] data1, + output [32 - 1 : 0] out1, + input [13 - 1 : 0] addr2, + input [32 - 1 : 0] data2, + output [32 - 1 : 0] out2 +); + reg [32 - 1 : 0] ram[2**13 - 1 : 0]; + reg [32 - 1 : 0] data_out1; + reg [32 - 1 : 0] data_out2; + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 65536x36 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_65536x36 ( + input clk, + input we1, + input we2, + input [16 - 1 : 0] addr1, + input [36 - 1 : 0] data1, + output [36 - 1 : 0] out1, + input [16 - 1 : 0] addr2, + input [36 - 1 : 0] data2, + output [36 - 1 : 0] out2 +); + reg [36 - 1 : 0] ram[2**16 - 1 : 0]; + reg [36 - 1 : 0] data_out1; + reg [36 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 65536x18 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_65536x18 ( + input clk, + input we1, + input we2, + input [16 - 1 : 0] addr1, + input [18 - 1 : 0] data1, + output [18 - 1 : 0] out1, + input [16 - 1 : 0] addr2, + input [18 - 1 : 0] data2, + output [18 - 1 : 0] out2 +); + reg [18 - 1 : 0] ram[2**16 - 1 : 0]; + reg [18 - 1 : 0] data_out1; + reg [18 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 65536x8 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_65536x8 ( + input clk, + input we1, + input we2, + input [16 - 1 : 0] addr1, + input [8 - 1 : 0] data1, + output [8 - 1 : 0] out1, + input [16 - 1 : 0] addr2, + input [8 - 1 : 0] data2, + output [8 - 1 : 0] out2 +); + reg [8 - 1 : 0] ram[2**16 - 1 : 0]; + reg [8 - 1 : 0] data_out1; + reg [8 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A single-port RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module single_port_ram ( + input clk, + input we, + input [`MANTISSA_PRECISION - 1 : 0] addr, + input [31:0] data, + output [31:0] out ); + + reg [31:0] ram[2**`MANTISSA_PRECISION - 1 : 0]; + reg [31:0] internal; + + assign out = internal; + + always @(posedge clk) begin + if(wen) begin + ram[addr] <= data; + end + + if(ren) begin + internal <= ram[addr]; + end + end + +endmodule + +//--------------------------------------- +// A single-port 1024x32bit RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module single_port_ram_1024x32 ( + input clk, + input we, + input [9:0] addr, + input [31:0] data, + output [31:0] out ); + + reg [31:0] ram[1023:0]; + reg [31:0] internal; + + assign out = internal; + + always @(posedge clk) begin + if(wen) begin + ram[addr] <= data; + end + + if(ren) begin + internal <= ram[addr]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/mkDelayWorker32B.v b/openfpga_flow/benchmarks/vtr_benchmark/mkDelayWorker32B.v index d20b58708..430f6a64b 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/mkDelayWorker32B.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/mkDelayWorker32B.v @@ -1503,7 +1503,7 @@ module mkDelayWorker32B(wciS0_Clk, wire [255:0] dp_out_not_used1; wire [255:0] dp_out_not_used2; - dual_port_ram dpram1 ( + dual_port_ram_1024x256 dpram1 ( .clk(wciS0_Clk), .addr1(mesgRF_memory__ADDRA), .addr2(mesgRF_memory__ADDRB), @@ -1521,7 +1521,7 @@ wire [255:0] dp_out_not_used2; // .DATA_WIDTH(32'b1056), // .MEMSIZE(11'b1024)) mesgWF_memory( - dual_port_ram dpram2 ( + dual_port_ram_1024x256 dpram2 ( .clk(wciS0_Clk), .addr1(mesgWF_memory__ADDRA), .addr2(mesgWF_memory__ADDRB), @@ -4083,17 +4083,17 @@ input [`dwa-1:0] din; input we; output [`dwa-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awa-1:0] wp; wire [`awa-1:0] wp_pl1; wire [`awa-1:0] wp_pl2; @@ -4120,7 +4120,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 32'b00000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_16x32 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -4468,17 +4468,17 @@ input [`dwa-1:0] din; input we; output [`dwa-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awa-1:0] wp; wire [`awa-1:0] wp_pl1; wire [`awa-1:0] wp_pl2; @@ -4505,7 +4505,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 32'b00000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_16x32 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -4857,17 +4857,17 @@ input [`dwc-1:0] din; input we; output [`dwc-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awa-1:0] wp; wire [`awa-1:0] wp_pl1; wire [`awa-1:0] wp_pl2; @@ -4894,7 +4894,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 128'b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_16x128 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -5246,17 +5246,17 @@ input [`dwd-1:0] din; input we; output [`dwd-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awa-1:0] wp; wire [`awa-1:0] wp_pl1; wire [`awa-1:0] wp_pl2; @@ -5283,7 +5283,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 128'b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_16x128 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -5636,17 +5636,17 @@ input [`dwc-1:0] din; input we; output [`dwc-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awc-1:0] wp; wire [`awc-1:0] wp_pl1; wire [`awc-1:0] wp_pl2; @@ -5673,7 +5673,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 60'b000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_8x60 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -6023,17 +6023,17 @@ input [`dwf-1:0] din; input we; output [`dwf-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awf-1:0] wp; wire [`awf-1:0] wp_pl1; wire [`awf-1:0] wp_pl2; @@ -6060,7 +6060,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 313'b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_8x313 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -6413,17 +6413,17 @@ input [`dwx-1:0] din; input we; output [`dwx-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awx-1:0] wp; wire [`awx-1:0] wp_pl1; wire [`awx-1:0] wp_pl2; @@ -6450,7 +6450,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 131'b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_4x131 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -6579,4 +6579,280 @@ always @(posedge clk ) else if(re & (cnt <= (`max_size-`n+1)) & !we) full_n_r <= 1'b0; -endmodule \ No newline at end of file +endmodule + +//--------------------------------------- +// A dual-port RAM 1024x256 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_1024x256 ( + input clk, + input we1, + input we2, + input [10 - 1 : 0] addr1, + input [256 - 1 : 0] data1, + output [256 - 1 : 0] out1, + input [10 - 1 : 0] addr2, + input [256 - 1 : 0] data2, + output [256 - 1 : 0] out2 +); + reg [256 - 1 : 0] ram[2**10 - 1 : 0]; + reg [256 - 1 : 0] data_out1; + reg [256 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 16x32 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_16x32 ( + input clk, + input we1, + input we2, + input [4 - 1 : 0] addr1, + input [32 - 1 : 0] data1, + output [32 - 1 : 0] out1, + input [4 - 1 : 0] addr2, + input [32 - 1 : 0] data2, + output [32 - 1 : 0] out2 +); + reg [32 - 1 : 0] ram[2**4 - 1 : 0]; + reg [32 - 1 : 0] data_out1; + reg [32 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 16x128 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_16x128 ( + input clk, + input we1, + input we2, + input [4 - 1 : 0] addr1, + input [128 - 1 : 0] data1, + output [128 - 1 : 0] out1, + input [4 - 1 : 0] addr2, + input [128 - 1 : 0] data2, + output [128 - 1 : 0] out2 +); + reg [128 - 1 : 0] ram[2**4 - 1 : 0]; + reg [128 - 1 : 0] data_out1; + reg [128 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 8x60 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_8x60 ( + input clk, + input we1, + input we2, + input [3 - 1 : 0] addr1, + input [60 - 1 : 0] data1, + output [60 - 1 : 0] out1, + input [3 - 1 : 0] addr2, + input [60 - 1 : 0] data2, + output [60 - 1 : 0] out2 +); + reg [60 - 1 : 0] ram[2**3 - 1 : 0]; + reg [60 - 1 : 0] data_out1; + reg [60 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 8x313 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_8x313 ( + input clk, + input we1, + input we2, + input [3 - 1 : 0] addr1, + input [313 - 1 : 0] data1, + output [313 - 1 : 0] out1, + input [3 - 1 : 0] addr2, + input [313 - 1 : 0] data2, + output [313 - 1 : 0] out2 +); + reg [313 - 1 : 0] ram[2**3 - 1 : 0]; + reg [313 - 1 : 0] data_out1; + reg [313 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 4x131 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_4x131 ( + input clk, + input we1, + input we2, + input [2 - 1 : 0] addr1, + input [131 - 1 : 0] data1, + output [131 - 1 : 0] out1, + input [2 - 1 : 0] addr2, + input [131 - 1 : 0] data2, + output [131 - 1 : 0] out2 +); + reg [131 - 1 : 0] ram[2**2 - 1 : 0]; + reg [131 - 1 : 0] data_out1; + reg [131 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/mkPktMerge.v b/openfpga_flow/benchmarks/vtr_benchmark/mkPktMerge.v index 505c46b30..d410baeb3 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/mkPktMerge.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/mkPktMerge.v @@ -516,16 +516,16 @@ input [`dw-1:0] din; input we; output [`dw-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`aw-1:0] wp; wire [`aw-1:0] wp_pl1; @@ -913,17 +913,16 @@ input [`dw-1:0] din; input we; output [`dw-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // - +wire [1:0] level; reg [`aw-1:0] wp; wire [`aw-1:0] wp_pl1; wire [`aw-1:0] wp_pl2; @@ -1311,17 +1310,17 @@ input [`dw-1:0] din; input we; output [`dw-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`aw-1:0] wp; wire [`aw-1:0] wp_pl1; wire [`aw-1:0] wp_pl2; @@ -1491,4 +1490,49 @@ begin end endmodule - +//--------------------------------------- +// A dual-port RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram ( + input clk, + input we1, + input we2, + input [`aw - 1 : 0] addr1, + input [`dw - 1 : 0] data1, + output [`dw - 1 : 0] out1, + input [`aw - 1 : 0] addr2, + input [`dw - 1 : 0] data2, + output [`dw - 1 : 0] out2 +); + + reg [`dw - 1 : 0] ram[2**`aw - 1 : 0]; + reg [`dw - 1 : 0] data_out1; + reg [`dw - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/mkSMAdapter4B.v b/openfpga_flow/benchmarks/vtr_benchmark/mkSMAdapter4B.v index bf033bc57..d5c26e2bf 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/mkSMAdapter4B.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/mkSMAdapter4B.v @@ -3409,17 +3409,17 @@ input [`dwa-1:0] din; input we; output [`dwa-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awa-1:0] wp; wire [`awa-1:0] wp_pl1; wire [`awa-1:0] wp_pl2; @@ -3446,7 +3446,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 60'b000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_64x60 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -3798,17 +3798,17 @@ input [`dwb-1:0] din; input we; output [`dwb-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awb-1:0] wp; wire [`awb-1:0] wp_pl1; wire [`awb-1:0] wp_pl2; @@ -3835,7 +3835,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 34'b0000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_4x32 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -4189,17 +4189,17 @@ input [`dwc-1:0] din; input we; output [`dwc-1:0] dout; input re; -output full, full_r; -output empty, empty_r; -output full_n, full_n_r; -output empty_n, empty_n_r; -output [1:0] level; +output full_r; +output empty_r; +output full_n_r; +output empty_n_r; //////////////////////////////////////////////////////////////////// // // Local Wires // +wire [1:0] level; reg [`awc-1:0] wp; wire [`awc-1:0] wp_pl1; wire [`awc-1:0] wp_pl2; @@ -4226,7 +4226,7 @@ reg full_n_r, empty_n_r; // manually assign assign junk_in = 61'b0000000000000000000000000000000000000000000000000000000000000; -dual_port_ram ram1( +dual_port_ram_8x61 ram1( .clk( clk ), .addr1( rp ), .addr2( wp ), @@ -4373,3 +4373,140 @@ VAL=1'b0; end endmodule +//--------------------------------------- +// A dual-port RAM 64x60 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_64x60 ( + input clk, + input we1, + input we2, + input [6 - 1 : 0] addr1, + input [60 - 1 : 0] data1, + output [60 - 1 : 0] out1, + input [6 - 1 : 0] addr2, + input [60 - 1 : 0] data2, + output [60 - 1 : 0] out2 +); + reg [60 - 1 : 0] ram[2**6 - 1 : 0]; + reg [60 - 1 : 0] data_out1; + reg [60 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 4x32 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_4x32 ( + input clk, + input we1, + input we2, + input [2 - 1 : 0] addr1, + input [32 - 1 : 0] data1, + output [32 - 1 : 0] out1, + input [2 - 1 : 0] addr2, + input [32 - 1 : 0] data2, + output [32 - 1 : 0] out2 +); + reg [32 - 1 : 0] ram[2**2 - 1 : 0]; + reg [32 - 1 : 0] data_out1; + reg [32 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule + +//--------------------------------------- +// A dual-port RAM 8x61 +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram_8x61 ( + input clk, + input we1, + input we2, + input [3 - 1 : 0] addr1, + input [61 - 1 : 0] data1, + output [61 - 1 : 0] out1, + input [3 - 1 : 0] addr2, + input [61 - 1 : 0] data2, + output [61 - 1 : 0] out2 +); + reg [61 - 1 : 0] ram[2**3 - 1 : 0]; + reg [61 - 1 : 0] data_out1; + reg [61 - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end + +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/or1200.v b/openfpga_flow/benchmarks/vtr_benchmark/or1200.v index e6f75b38e..df164e27a 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/or1200.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/or1200.v @@ -5234,3 +5234,49 @@ end wire[8:0] unused_signal; assign unused_signal = lsu_op; endmodule + +//--------------------------------------- +// A dual-port RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module dual_port_ram ( + input clk, + input we1, + input we2, + input [`OR1200_REGFILE_ADDR_WIDTH - 1 : 0] addr1, + input [`OR1200_OPERAND_WIDTH - 1 : 0] data1, + output [`OR1200_OPERAND_WIDTH - 1 : 0] out1, + input [`OR1200_REGFILE_ADDR_WIDTH - 1 : 0] addr2, + input [`OR1200_OPERAND_WIDTH - 1 : 0] data2, + output [`OR1200_OPERAND_WIDTH - 1 : 0] out2 +); + + reg [`OR1200_OPERAND_WIDTH - 1 : 0] ram[2**`OR1200_REGFILE_ADDR_WIDTH - 1 : 0]; + reg [`OR1200_OPERAND_WIDTH - 1 : 0] data_out1; + reg [`OR1200_OPERAND_WIDTH - 1 : 0] data_out2; + + assign out1 = data_out1; + assign out2 = data_out2; + + // If writen enable 1 is activated, + // data1 will be loaded through addr1 + // Otherwise, data will be read out through addr1 + always @(posedge clk) begin + if (we1) begin + ram[addr1] <= data1; + end else begin + data_out1 <= ram[addr1]; + end + end + + // If writen enable 2 is activated, + // data1 will be loaded through addr2 + // Otherwise, data will be read out through addr2 + always @(posedge clk) begin + if (we2) begin + ram[addr2] <= data2; + end else begin + data_out2 <= ram[addr2]; + end + end +endmodule diff --git a/openfpga_flow/benchmarks/vtr_benchmark/raygentop.v b/openfpga_flow/benchmarks/vtr_benchmark/raygentop.v index 2aaeec7a6..0f4a66b43 100755 --- a/openfpga_flow/benchmarks/vtr_benchmark/raygentop.v +++ b/openfpga_flow/benchmarks/vtr_benchmark/raygentop.v @@ -2974,3 +2974,30 @@ module fifo3 (datain, writeen, dataout, shiften, globalreset, clk); end endmodule +//--------------------------------------- +// A single-port 256x21bit RAM +// This module is tuned for VTR's benchmarks +//--------------------------------------- +module single_port_ram ( + input clk, + input we, + input [7:0] addr, + input [20:0] data, + output [20:0] out ); + + reg [20:0] ram[255:0]; + reg [20:0] internal; + + assign out = internal; + + always @(posedge clk) begin + if(wen) begin + ram[addr] <= data; + end + + if(ren) begin + internal <= ram[addr]; + end + end + +endmodule diff --git a/openfpga_flow/misc/fpgaflow_default_tool_path.conf b/openfpga_flow/misc/fpgaflow_default_tool_path.conf index 99555d626..62220e535 100644 --- a/openfpga_flow/misc/fpgaflow_default_tool_path.conf +++ b/openfpga_flow/misc/fpgaflow_default_tool_path.conf @@ -19,6 +19,9 @@ valid_flows = vpr_blif,yosys_vpr [DEFAULT_PARSE_RESULT_VPR] # parser format = , clb_blocks = "Netlist clb blocks: ([0-9]+)", str +io_blocks = "Netlist io blocks: ([0-9]+)", str +mult_blocks = "Netlist mult_36 blocks: ([0-9]+)", str +memory_blocks = "Netlist memory blocks: ([0-9]+)", str logic_delay = "Total logic delay: ([0-9.]+)", str total_net_delay = "total net delay: ([0-9.]+)", str total_routing_area = "Total routing area: ([0-9.]+)", str diff --git a/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys b/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys new file mode 100644 index 000000000..a81474999 --- /dev/null +++ b/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys @@ -0,0 +1,105 @@ +# Yosys synthesis script for ${TOP_MODULE} + +######################### +# Parse input files +######################### +# Read verilog files +${READ_VERILOG_FILE} +# Read technology library +read_verilog -lib -specify ${YOSYS_CELL_SIM_VERILOG} + +######################### +# Prepare for synthesis +######################### +# Identify top module from hierarchy +hierarchy -check -top ${TOP_MODULE} +# - Convert process blocks to AST +proc +# Flatten all the gates/primitives +flatten +# Identify tri-state buffers from 'z' signal in AST +# with follow-up optimizations to clean up AST +tribuf -logic +opt_expr +opt_clean +# demote inout ports to input or output port +# with follow-up optimizations to clean up AST +deminout +opt + +opt_expr +opt_clean +check +opt +wreduce -keepdc +peepopt +pmuxtree +opt_clean + +######################## +# Map multipliers +# Inspired from synth_xilinx.cc +######################### +# Avoid merging any registers into DSP, reserve memory port registers first +memory_dff +wreduce t:$mul +techmap -map +/mul2dsp.v -map ${YOSYS_DSP_MAP_VERILOG} ${YOSYS_DSP_MAP_PARAMETERS} +select a:mul2dsp +setattr -unset mul2dsp +opt_expr -fine +wreduce +select -clear +chtype -set $mul t:$__soft_mul# Extract arithmetic functions + +######################### +# Run coarse synthesis +######################### +# Run a tech map with default library +techmap +alumacc +share +opt +fsm +# Run a quick follow-up optimization to sweep out unused nets/signals +opt -fast +# Optimize any memory cells by merging share-able ports and collecting all the ports belonging to memorcy cells +memory -nomap +opt_clean + +######################### +# Map logics to BRAMs +######################### +memory_bram -rules ${YOSYS_BRAM_MAP_RULES} +techmap -map ${YOSYS_BRAM_MAP_VERILOG} +opt -fast -mux_undef -undriven -fine +memory_map +opt -undriven -fine + +######################### +# Map flip-flops +######################### +techmap -map +/adff2dff.v +opt_expr -mux_undef +simplemap +opt_expr +opt_merge +opt_rmdff +opt_clean +opt + +######################### +# Map LUTs +######################### +abc -lut ${LUT_SIZE} + +######################### +# Check and show statisitics +######################### +hierarchy -check +stat + +######################### +# Output netlists +######################### +opt_clean -purge +write_blif ${OUTPUT_BLIF} diff --git a/openfpga_flow/misc/ys_tmpl_yosys_vpr_dsp_flow.ys b/openfpga_flow/misc/ys_tmpl_yosys_vpr_dsp_flow.ys new file mode 100644 index 000000000..849ea9811 --- /dev/null +++ b/openfpga_flow/misc/ys_tmpl_yosys_vpr_dsp_flow.ys @@ -0,0 +1,96 @@ +# Yosys synthesis script for ${TOP_MODULE} + +######################### +# Parse input files +######################### +# Read verilog files +${READ_VERILOG_FILE} +# Read technology library +read_verilog -lib -specify ${YOSYS_CELL_SIM_VERILOG} + +######################### +# Prepare for synthesis +######################### +# Identify top module from hierarchy +hierarchy -check -top ${TOP_MODULE} +# - Convert process blocks to AST +proc +# Flatten all the gates/primitives +flatten +# Identify tri-state buffers from 'z' signal in AST +# with follow-up optimizations to clean up AST +tribuf -logic +opt_expr +opt_clean +# demote inout ports to input or output port +# with follow-up optimizations to clean up AST +deminout +opt + +opt_expr +opt_clean +check +opt +wreduce -keepdc +peepopt +pmuxtree +opt_clean + +######################## +# Map multipliers +# Inspired from synth_xilinx.cc +######################### +# Avoid merging any registers into DSP, reserve memory port registers first +memory_dff +wreduce t:$mul +techmap -map +/mul2dsp.v -map ${YOSYS_DSP_MAP_VERILOG} ${YOSYS_DSP_MAP_PARAMETERS} +select a:mul2dsp +setattr -unset mul2dsp +opt_expr -fine +wreduce +select -clear +chtype -set $mul t:$__soft_mul# Extract arithmetic functions + +######################### +# Run coarse synthesis +######################### +# Run a tech map with default library +techmap +alumacc +share +opt +fsm +# Run a quick follow-up optimization to sweep out unused nets/signals +opt -fast +# Optimize any memory cells by merging share-able ports and collecting all the ports belonging to memorcy cells +memory -nomap +opt_clean + +######################### +# Map flip-flops +######################### +techmap -map +/adff2dff.v +opt_expr -mux_undef +simplemap +opt_expr +opt_merge +opt_rmdff +opt_clean +opt + +######################### +# Map LUTs +######################### +abc -lut ${LUT_SIZE} + +######################### +# Check and show statisitics +######################### +hierarchy -check +stat + +######################### +# Output netlists +######################### +opt_clean -purge +write_blif ${OUTPUT_BLIF} \ No newline at end of file diff --git a/openfpga_flow/openfpga_arch/k4_frac_N8_reset_softadder_register_scan_chain_dsp8_caravel_io_skywater130nm_fdhd_cc_openfpga.xml b/openfpga_flow/openfpga_arch/k4_frac_N8_reset_softadder_register_scan_chain_dsp8_caravel_io_skywater130nm_fdhd_cc_openfpga.xml new file mode 100644 index 000000000..ebef951da --- /dev/null +++ b/openfpga_flow/openfpga_arch/k4_frac_N8_reset_softadder_register_scan_chain_dsp8_caravel_io_skywater130nm_fdhd_cc_openfpga.xml @@ -0,0 +1,290 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + 10e-12 + + + 10e-12 + + + + + + + + + + + + 10e-12 5e-12 + + + 10e-12 5e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml b/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml similarity index 93% rename from openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml rename to openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml index 37beb67bf..ed8f0134c 100644 --- a/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml +++ b/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml @@ -206,6 +206,16 @@ + + + + + + + + + + @@ -265,6 +275,10 @@ + + + + diff --git a/openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v b/openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v new file mode 100644 index 000000000..a8649488b --- /dev/null +++ b/openfpga_flow/openfpga_cell_library/verilog/mult_8x8.v @@ -0,0 +1,16 @@ +//----------------------------------------------------- +// Design Name : mult_8x8 +// File Name : mult_8x8.v +// Function : A 8-bit multiplier +// Coder : Xifan Tang +//----------------------------------------------------- + +module mult_8x8 ( + input [0:7] A, + input [0:7] B, + output [0:15] Y +); + + assign Y = A * B; + +endmodule diff --git a/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga b/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga new file mode 100644 index 000000000..10ed5f17a --- /dev/null +++ b/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga @@ -0,0 +1,74 @@ +# Run VPR for the 'and' design +#--write_rr_graph example_rr_graph.xml +vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} --clock_modeling route --device ${OPENFPGA_VPR_DEVICE_LAYOUT} + +# Read OpenFPGA architecture definition +read_openfpga_arch -f ${OPENFPGA_ARCH_FILE} + +# Read OpenFPGA simulation settings +read_openfpga_simulation_setting -f ${OPENFPGA_SIM_SETTING_FILE} + +# Annotate the OpenFPGA architecture to VPR data base +# to debug use --verbose options +link_openfpga_arch --sort_gsb_chan_node_in_edges + +# Check and correct any naming conflicts in the BLIF netlist +check_netlist_naming_conflict --fix --report ./netlist_renaming.xml + +# Apply fix-up to clustering nets based on routing results +pb_pin_fixup --verbose + +# Apply fix-up to Look-Up Table truth tables based on packing results +lut_truth_table_fixup + +# Build the module graph +# - Enabled compression on routing architecture modules +# - Enable pin duplication on grid modules +build_fabric --compress_routing #--verbose + +# Write the fabric hierarchy of module graph to a file +# This is used by hierarchical PnR flows +write_fabric_hierarchy --file ./fabric_hierarchy.txt + +# Repack the netlist to physical pbs +# This must be done before bitstream generator and testbench generation +# Strongly recommend it is done after all the fix-up have been applied +repack #--verbose + +# Build the bitstream +# - Output the fabric-independent bitstream to a file +build_architecture_bitstream --verbose --write_file fabric_independent_bitstream.xml + +# Build fabric-dependent bitstream +build_fabric_bitstream --verbose + +# Write fabric-dependent bitstream +write_fabric_bitstream --file fabric_bitstream.xml --format xml + +# Write the Verilog netlist for FPGA fabric +# - Enable the use of explicit port mapping in Verilog netlist +write_fabric_verilog --file ./SRC --explicit_port_mapping --include_timing --print_user_defined_template --verbose + +# Write the Verilog testbench for FPGA fabric +# - We suggest the use of same output directory as fabric Verilog netlists +# - Must specify the reference benchmark file if you want to output any testbenches +# - Enable top-level testbench which is a full verification including programming circuit and core logic of FPGA +# - Enable pre-configured top-level testbench which is a fast verification skipping programming phase +# - Simulation ini file is optional and is needed only when you need to interface different HDL simulators using openfpga flow-run scripts +write_verilog_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --print_top_testbench --print_preconfig_top_testbench --print_simulation_ini ./SimulationDeck/simulation_deck.ini --include_signal_init --support_icarus_simulator #--explicit_port_mapping + +# Write the SDC files for PnR backend +# - Turn on every options here +write_pnr_sdc --file ./SDC + +# Write SDC to disable timing for configure ports +write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc + +# Write the SDC to run timing analysis for a mapped FPGA fabric +write_analysis_sdc --file ./SDC_analysis + +# Finish and exit OpenFPGA +exit + +# Note : +# To run verification at the end of the flow maintain source in ./SRC directory diff --git a/openfpga_flow/openfpga_shell_scripts/vtr_benchmark_example_script.openfpga b/openfpga_flow/openfpga_shell_scripts/vtr_benchmark_example_script.openfpga index 999ba8dfc..04089a450 100644 --- a/openfpga_flow/openfpga_shell_scripts/vtr_benchmark_example_script.openfpga +++ b/openfpga_flow/openfpga_shell_scripts/vtr_benchmark_example_script.openfpga @@ -2,7 +2,7 @@ # When the global clock is defined as a port of a tile, clock routing in VPR should be skipped # This is due to the Fc_in of clock port is set to 0 for global wiring #--write_rr_graph example_rr_graph.xml -vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} +vpr ${VPR_ARCH_FILE} ${VPR_TESTBENCH_BLIF} --route_chan_width ${VPR_ROUTE_CHAN_WIDTH} # Read OpenFPGA architecture definition read_openfpga_arch -f ${OPENFPGA_ARCH_FILE} @@ -22,15 +22,17 @@ link_openfpga_arch --sort_gsb_chan_node_in_edges check_netlist_naming_conflict --fix --report ./netlist_renaming.xml # Apply fix-up to clustering nets based on routing results -pb_pin_fixup --verbose +pb_pin_fixup #--verbose # Apply fix-up to Look-Up Table truth tables based on packing results lut_truth_table_fixup # Build the module graph # - Enabled compression on routing architecture modules -# - Enable pin duplication on grid modules -build_fabric --compress_routing #--verbose +# - Enabled frame view creation to save runtime and memory +# Note that this is turned on when bitstream generation +# is the ONLY purpose of the flow!!! +build_fabric --compress_routing --frame_view #--verbose # Write the fabric hierarchy of module graph to a file # This is used by hierarchical PnR flows @@ -51,28 +53,6 @@ build_fabric_bitstream --verbose # Write fabric-dependent bitstream write_fabric_bitstream --file fabric_bitstream.xml --format xml -# Write the Verilog netlist for FPGA fabric -# - Enable the use of explicit port mapping in Verilog netlist -write_fabric_verilog --file ./SRC --explicit_port_mapping --include_timing --print_user_defined_template --verbose - -# Write the Verilog testbench for FPGA fabric -# - We suggest the use of same output directory as fabric Verilog netlists -# - Must specify the reference benchmark file if you want to output any testbenches -# - Enable top-level testbench which is a full verification including programming circuit and core logic of FPGA -# - Enable pre-configured top-level testbench which is a fast verification skipping programming phase -# - Simulation ini file is optional and is needed only when you need to interface different HDL simulators using openfpga flow-run scripts -write_verilog_testbench --file ./SRC --reference_benchmark_file_path ${REFERENCE_VERILOG_TESTBENCH} --print_top_testbench --print_preconfig_top_testbench --print_simulation_ini ./SimulationDeck/simulation_deck.ini --include_signal_init --support_icarus_simulator #--explicit_port_mapping - -# Write the SDC files for PnR backend -# - Turn on every options here -write_pnr_sdc --file ./SDC - -# Write SDC to disable timing for configure ports -write_sdc_disable_timing_configure_ports --file ./SDC/disable_configure_ports.sdc - -# Write the SDC to run timing analysis for a mapped FPGA fabric -write_analysis_sdc --file ./SDC_analysis - # Finish and exit OpenFPGA exit diff --git a/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_cell_sim.v b/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_cell_sim.v new file mode 100644 index 000000000..1c9940188 --- /dev/null +++ b/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_cell_sim.v @@ -0,0 +1,12 @@ +//----------------------------- +// 8-bit multiplier +//----------------------------- +module mult_8( + input [0:7] A, + input [0:7] B, + output [0:15] Y +); + +assign Y = A * B; + +endmodule diff --git a/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_dsp_map.v b/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_dsp_map.v new file mode 100644 index 000000000..e492482f4 --- /dev/null +++ b/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_dsp_map.v @@ -0,0 +1,20 @@ +//----------------------------- +// 8-bit multiplier +//----------------------------- +module mult_8x8 ( + input [0:7] A, + input [0:7] B, + output [0:15] Y +); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + mult_8 #() _TECHMAP_REPLACE_ ( + .A (A), + .B (B), + .Y (Y) ); + +endmodule diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram.txt b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt similarity index 100% rename from openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram.txt rename to openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram_map.v b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v similarity index 78% rename from openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram_map.v rename to openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v index fdc8bef43..804077258 100644 --- a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram_map.v +++ b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v @@ -1,9 +1,9 @@ module $__MY_DPRAM_1024x8 ( - output [7:0] B1DATA, + output [0:7] B1DATA, input CLK1, - input [9:0] B1ADDR, - input [9:0] A1ADDR, - input [7:0] A1DATA, + input [0:9] B1ADDR, + input [0:9] A1ADDR, + input [0:7] A1DATA, input A1EN, input B1EN ); diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_cell_sim.v b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v similarity index 68% rename from openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_cell_sim.v rename to openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v index 9d9c61636..bc8f1206e 100644 --- a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_cell_sim.v +++ b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v @@ -5,15 +5,15 @@ module dpram_1024x8_core ( input wclk, input wen, - input [9:0] waddr, - input [7:0] data_in, + input [0:9] waddr, + input [0:7] data_in, input rclk, input ren, - input [9:0] raddr, - output [7:0] data_out ); + input [0:9] raddr, + output [0:7] data_out ); - reg [7:0] ram[1023:0]; - reg [7:0] internal; + reg [0:7] ram[0:1023]; + reg [0:7] internal; assign data_out = internal; @@ -40,10 +40,10 @@ module dpram_1024x8 ( input clk, input wen, input ren, - input [9:0] waddr, - input [9:0] raddr, - input [7:0] data_in, - output [7:0] data_out ); + input [0:9] waddr, + input [0:9] raddr, + input [0:7] data_in, + output [0:7] data_out ); dpram_1024x8_core memory_0 ( .wclk (clk), @@ -57,3 +57,16 @@ module dpram_1024x8 ( endmodule +//----------------------------- +// 36-bit multiplier +//----------------------------- +module mult_36( + input [0:35] A, + input [0:35] B, + output [0:71] Y +); + +assign Y = A * B; + +endmodule + diff --git a/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v new file mode 100644 index 000000000..977afdb13 --- /dev/null +++ b/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v @@ -0,0 +1,17 @@ +module mult_36x36 ( + input [0:35] A, + input [0:35] B, + output [0:71] Y +); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 0; + parameter B_WIDTH = 0; + parameter Y_WIDTH = 0; + + mult_36 #() _TECHMAP_REPLACE_ ( + .A (A), + .B (B), + .Y (Y) ); + +endmodule diff --git a/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh b/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh index 450bc262c..0033ad8ac 100755 --- a/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh +++ b/openfpga_flow/regression_test_scripts/fpga_verilog_reg_test.sh @@ -44,6 +44,9 @@ run-task fpga_verilog/bram/dpram16k --debug --show_thread_logs echo -e "Testing Verilog generation with 16k block RAMs spanning two columns "; run-task fpga_verilog/bram/wide_dpram16k --debug --show_thread_logs +echo -e "Testing Verilog generation with heterogeneous fabric using 8-bit single-mode multipliers "; +run-task fpga_verilog/dsp/single_mode_mult_8x8 --debug --show_thread_logs + echo -e "Testing Verilog generation with different I/O capacities on each side of an FPGA "; run-task fpga_verilog/io/multi_io_capacity --debug --show_thread_logs diff --git a/openfpga_flow/regression_test_scripts/vtr_benchmark_reg_test.sh b/openfpga_flow/regression_test_scripts/vtr_benchmark_reg_test.sh index ead638405..4ca761a3e 100755 --- a/openfpga_flow/regression_test_scripts/vtr_benchmark_reg_test.sh +++ b/openfpga_flow/regression_test_scripts/vtr_benchmark_reg_test.sh @@ -8,3 +8,5 @@ PYTHON_EXEC=python3.8 ############################################## echo -e "VTR benchmark regression tests"; run-task benchmark_sweep/vtr_benchmarks --debug --show_thread_logs +# Run a quick but relaxed QoR check for heterogeneous blocks +python3 openfpga_flow/scripts/check_qor.py --reference_csv_file openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv --check_csv_file openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/latest/task_result.csv --metric_checklist_csv_file openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/metric_checklist.csv --check_tolerance 0.2,100 diff --git a/openfpga_flow/scripts/check_qor.py b/openfpga_flow/scripts/check_qor.py new file mode 100644 index 000000000..9b55e64c1 --- /dev/null +++ b/openfpga_flow/scripts/check_qor.py @@ -0,0 +1,129 @@ +##################################################################### +# Python script to check if heterogeneous blocks, e.g., RAM and multipliers +# have been inferred during openfpga flow +# # This script will +# - Check the .csv file generated by openfpga task-run to find out +# the number of each type of heterogeneous blocks +##################################################################### + +import os +from os.path import dirname, abspath, isfile +import shutil +import re +import argparse +import logging +import csv + +##################################################################### +# Contants +##################################################################### +csv_name_tag = "name" +csv_metric_tag = "metric" + +##################################################################### +# Initialize logger +##################################################################### +logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) + +##################################################################### +# Parse the options +# - [mandatory option] the file path to .csv file +##################################################################### +parser = argparse.ArgumentParser( + description='A checker for hetergeneous block mapping in OpenFPGA flow') +parser.add_argument('--check_csv_file', required=True, + help='Specify the to-be-checked csv file constaining flow-run information') +parser.add_argument('--reference_csv_file', required=True, + help='Specify the reference csv file constaining flow-run information') +parser.add_argument('--metric_checklist_csv_file', required=True, + help='Specify the csv file constaining metrics to be checked') +# By default, allow a 50% tolerance when checking metrics +parser.add_argument('--check_tolerance', default="0.5,1.5", + help='Specify the tolerance when checking metrics. Format ,') +args = parser.parse_args() + +##################################################################### +# Check options: +# - Input csv files must be valid +# Otherwise, error out +##################################################################### +if not isfile(args.check_csv_file): + logging.error("Invalid csv file to check: " + args.check_csv_file + "\nFile does not exist!\n") + exit(1) + +if not isfile(args.reference_csv_file): + logging.error("Invalid reference csv file: " + args.reference_csv_file + "\nFile does not exist!\n") + exit(1) + +if not isfile(args.metric_checklist_csv_file): + logging.error("Invalid metric checklist csv file: " + args.metric_checklist_csv_file + "\nFile does not exist!\n") + exit(1) + +##################################################################### +# Parse a checklist for metrics to be checked +##################################################################### +metric_checklist_csv_file = open(args.metric_checklist_csv_file, "r") +metric_checklist_csv_content = csv.DictReader(filter(lambda row : row[0]!='#', metric_checklist_csv_file), delimiter=',') +# Hash the reference results with the name tag +metric_checklist = [] +for row in metric_checklist_csv_content: + metric_checklist.append(row[csv_metric_tag]); + +##################################################################### +# Parse the reference csv file +# Skip any line start with '#' which is treated as comments +##################################################################### +ref_csv_file = open(args.reference_csv_file, "r") +ref_csv_content = csv.DictReader(filter(lambda row : row[0]!='#', ref_csv_file), delimiter=',') +# Hash the reference results with the name tag +ref_results = {} +for row in ref_csv_content: + ref_results[row[csv_name_tag]] = row; + +##################################################################### +# Parse the tolerance to be applied when checking metrics +##################################################################### +lower_bound_factor = float(args.check_tolerance.split(",")[0]) +upper_bound_factor = float(args.check_tolerance.split(",")[1]) + +##################################################################### +# Parse the csv file to check +##################################################################### +with open(args.check_csv_file, newline='') as check_csv_file: + results_to_check = csv.DictReader(check_csv_file, delimiter=',') + checkpoint_count = 0 + check_error_count = 0 + for row in results_to_check: + # Start from line 1 and check information + for metric_to_check in metric_checklist: + # Check if the metric is in a range + if (lower_bound_factor * float(ref_results[row[csv_name_tag]][metric_to_check]) > float(row[metric_to_check])) or (upper_bound_factor * float(ref_results[row[csv_name_tag]][metric_to_check]) < float(row[metric_to_check])) : + # Check QoR failed, error out + logging.error("Benchmark " + str(row[csv_name_tag]) + " failed in checking '" + str(metric_to_check) +"'\n" + "Found: " + str(row[metric_to_check]) + " but expected: " + str(ref_results[row[csv_name_tag]][metric_to_check]) + " outside range [" + str(lower_bound_factor * 100) + "%, " + str(upper_bound_factor * 100) + "%]") + check_error_count += 1 + # Pass this metric check, increase counter + checkpoint_count += 1 + logging.info("Checked " + str(checkpoint_count) + " metrics") + logging.info("See " + str(check_error_count) + " QoR failures") + + if (0 < check_error_count): + exit(1) + +##################################################################### +# Post checked results on stdout: +# reaching here, it means all the checks have passed +##################################################################### +with open(args.check_csv_file, newline='') as check_csv_file: + results_to_check = csv.DictReader(check_csv_file, delimiter=',') + # Print out keywords: name + metric checklist + print(str(csv_name_tag) + " ", end='') + for metric_to_check in metric_checklist: + print(str(metric_to_check) + " ", end='') + print("") + + for row in results_to_check: + # Start from line 1, print checked metrics + print(row[csv_name_tag] + " ", end='') + for metric_to_check in metric_checklist: + print(row[metric_to_check] + " ", end='') + print("") diff --git a/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/metric_checklist.csv b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/metric_checklist.csv new file mode 100644 index 000000000..80ebbc544 --- /dev/null +++ b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/metric_checklist.csv @@ -0,0 +1,6 @@ +########################################################## +# Metrics to check for VTR benchmark bitstream generation +########################################################## +metric +mult_blocks +memory_blocks diff --git a/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/task.conf b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/task.conf index ee728be59..278b07b8f 100644 --- a/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/task.conf +++ b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/task.conf @@ -17,23 +17,78 @@ fpga_flow=yosys_vpr [OpenFPGA_SHELL] openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/vtr_benchmark_example_script.openfpga -openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_40nm_openfpga.xml +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k6_frac_N10_adder_chain_dpram8K_dsp36_40nm_openfpga.xml openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml # Yosys script parameters -yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_cell_sim.v -yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram.txt -yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_40nm_bram_map.v +yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_cell_sim.v +yosys_bram_map_rules=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram.txt +yosys_bram_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_bram_map.v +yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm_dsp_map.v +yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=36 -D DSP_B_MAXWIDTH=36 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_36x36 +# VPR parameters +# Use a fixed routing channel width to save runtime +vpr_route_chan_width=300 [ARCHITECTURES] -arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml [BENCHMARKS] -bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/ch_intrinsics.v +# Official benchmarks from VTR benchmark release +# Comment out due to high runtime +#bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/bgm.v +bench1=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/blob_merge.v +# Failed due to an unknown error in VPR netlist parser +#bench2=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/boundtop.v +bench3=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/ch_intrinsics.v +bench4=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/diffeq1.v +bench5=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/diffeq2.v +# Comment out due to high runtime +#bench6=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/LU8PEEng.v +# Comment out due to high runtime +#bench7=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/LU32PEEng.v +# Comment out due to high runtime +#bench8=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/mcml.v +bench9=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/mkDelayWorker32B.v +bench10=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/mkPktMerge.v +bench11=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/mkSMAdapter4B.v +bench12=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/or1200.v +bench13=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/raygentop.v +bench14=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/sha.v +bench15=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/stereovision0.v +bench16=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/stereovision1.v +# Comment out due to high runtime +#bench17=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/stereovision2.v +bench18=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/stereovision3.v +# Additional benchmarks after VTR benchmark release +#bench19=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/arm_core.v +#bench20=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/spree.v +#bench21=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/vtr_benchmark/LU64PEEng.v [SYNTHESIS_PARAM] -bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_flow.ys +bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_bram_dsp_flow.ys # Benchmark ch_intrinsics -bench0_top = memset +bench0_top = bgm +bench1_top = RLE_BlobMerging +bench2_top = paj_boundtop_hierarchy_no_mem +bench3_top = memset +bench4_top = diffeq_paj_convert +bench5_top = diffeq_f_systemC +bench6_top = LU8PEEng +bench7_top = LU32PEEng +bench8_top = mcml +bench9_top = mkDelayWorker32B +bench10_top = mkPktMerge +bench11_top = mkSMAdapter4B +bench12_top = or1200_flat +bench13_top = paj_raygentop_hierarchy_no_mem +bench14_top = sha1 +bench15_top = sv_chip0_hierarchy_no_mem +bench16_top = sv_chip1_hierarchy_no_mem +bench17_top = sv_chip2_hierarchy_no_mem +bench18_top = sv_chip3_hierarchy_no_mem +bench19_top = arm_core +bench20_top = system +bench21_top = LU64PEEng [SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] #end_flow_with_test= diff --git a/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv new file mode 100644 index 000000000..af8842eed --- /dev/null +++ b/openfpga_flow/tasks/benchmark_sweep/vtr_benchmarks/config/vtr_benchmark_golden_results.csv @@ -0,0 +1,28 @@ +##################################################################### +# A database of benchmarks to be checked +# Reference: https://janders.eecg.utoronto.ca/pdfs/p77-rose.pdf +# Name,number of multipliers,number of RAMs +# IMPORTANT: +# - the name is tuned due to the naming convention of openfpga task-run script +# - the limitation should be CHANGED!!! +##################################################################### +name,mult_blocks,memory_blocks +00_bgm_MIN_ROUTE_CHAN_WIDTH,11,0 +00_RLE_BlobMerging_MIN_ROUTE_CHAN_WIDTH,0,0 +00_paj_boundtop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,1 +00_memset_MIN_ROUTE_CHAN_WIDTH,0,1 +00_diffeq_paj_convert_MIN_ROUTE_CHAN_WIDTH,5,0 +00_diffeq_f_systemC_MIN_ROUTE_CHAN_WIDTH,5,0 +00_LU8PEEng_MIN_ROUTE_CHAN_WIDTH,8,9 +00_LU32PEEng_MIN_ROUTE_CHAN_WIDTH,32,9 +00_mcml_MIN_ROUTE_CHAN_WIDTH,30,10 +00_mkDelayWorker32B_MIN_ROUTE_CHAN_WIDTH,0,9 +00_mkPktMerge_MIN_ROUTE_CHAN_WIDTH,0,3 +00_mkSMAdapter4B_MIN_ROUTE_CHAN_WIDTH,0,3 +00_or1200_flat_MIN_ROUTE_CHAN_WIDTH,1,2 +00_paj_raygentop_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,18,1 +00_sha1_MIN_ROUTE_CHAN_WIDTH,0,0 +00_sv_chip0_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,0 +00_sv_chip1_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,152,0 +00_sv_chip2_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,564,0 +00_sv_chip3_hierarchy_no_mem_MIN_ROUTE_CHAN_WIDTH,0,0 diff --git a/openfpga_flow/tasks/fpga_verilog/dsp/single_mode_mult_8x8/config/task.conf b/openfpga_flow/tasks/fpga_verilog/dsp/single_mode_mult_8x8/config/task.conf new file mode 100644 index 000000000..1c84a02f7 --- /dev/null +++ b/openfpga_flow/tasks/fpga_verilog/dsp/single_mode_mult_8x8/config/task.conf @@ -0,0 +1,43 @@ +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# Configuration file for running experiments +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = +# timeout_each_job : FPGA Task script splits fpga flow into multiple jobs +# Each job execute fpga_flow script on combination of architecture & benchmark +# timeout_each_job is timeout for each job +# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + +[GENERAL] +run_engine=openfpga_shell +power_tech_file = ${PATH:OPENFPGA_PATH}/openfpga_flow/tech/PTM_45nm/45nm.xml +power_analysis = false +spice_output=false +verilog_output=true +timeout_each_job = 20*60 +fpga_flow=yosys_vpr + +[OpenFPGA_SHELL] +openfpga_shell_template=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_shell_scripts/fix_heterogeneous_device_example_script.openfpga +openfpga_arch_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_arch/k4_frac_N8_reset_softadder_register_scan_chain_dsp8_caravel_io_skywater130nm_fdhd_cc_openfpga.xml +openfpga_sim_setting_file=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_simulation_settings/fixed_sim_openfpga.xml +# Yosys script parameters +yosys_cell_sim_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_cell_sim.v +yosys_dsp_map_verilog=${PATH:OPENFPGA_PATH}/openfpga_flow/openfpga_yosys_techlib/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm_dsp_map.v +yosys_dsp_map_parameters=-D DSP_A_MAXWIDTH=8 -D DSP_B_MAXWIDTH=8 -D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_NAME=mult_8x8 +# VPR parameter +openfpga_vpr_device_layout=3x2 + +[ARCHITECTURES] +arch0=${PATH:OPENFPGA_PATH}/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml + +[BENCHMARKS] +bench0=${PATH:OPENFPGA_PATH}/openfpga_flow/benchmarks/micro_benchmark/mac_8/mac_8.v + +[SYNTHESIS_PARAM] +bench_yosys_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_dsp_flow.ys +bench_yosys_rewrite_common=${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_yosys_vpr_flow_with_rewrite.ys;${PATH:OPENFPGA_PATH}/openfpga_flow/misc/ys_tmpl_rewrite_flow.ys + +bench0_top = mac_8 + +[SCRIPT_PARAM_MIN_ROUTE_CHAN_WIDTH] +end_flow_with_test= +vpr_fpga_verilog_formal_verification_top_netlist= diff --git a/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml b/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml new file mode 100644 index 000000000..fea61541c --- /dev/null +++ b/openfpga_flow/vpr_arch/k4_frac_N8_tileable_reset_softadder_register_scan_chain_dsp8_nonLR_caravel_io_skywater130nm.xml @@ -0,0 +1,906 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io_top.outpad io_top.inpad + + + + + + + + + + + + io_right.outpad io_right.inpad + + + + + + + + + + + + io_bottom.outpad io_bottom.inpad + + + + + + + + + + + + io_left.outpad io_left.inpad + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + clb.clk clb.reset + clb.reg_in clb.sc_in clb.cin clb.O[7:0] clb.I0 clb.I0i clb.I1 clb.I1i clb.I2 clb.I2i clb.I3 clb.I3i + clb.O[15:8] clb.I4 clb.I4i clb.I5 clb.I5i clb.I6 clb.I6i clb.I7 clb.I7i + clb.reg_out clb.sc_out clb.cout + + + + + + + + + + + + + + + + mult_8.a[0:5] mult_8.b[0:5] mult_8.out[0:10] + mult_8.a[6:7] mult_8.b[6:7] mult_8.out[11:15] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 + 1 + + + + 1 1 1 + 1 1 + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 235e-12 + 235e-12 + 235e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 261e-12 + 261e-12 + 261e-12 + 261e-12 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml similarity index 91% rename from openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml rename to openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml index d697a1e8d..a34b29c22 100644 --- a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_40nm.xml +++ b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_chain_dpram8K_dsp36_40nm.xml @@ -138,6 +138,15 @@ + + + + + + + + + @@ -196,6 +205,23 @@ memory.waddr[9:5] memory.raddr[9:5] memory.data_in[7:4] memory.ren memory.data_out[7:4] + + + + + + + + + + + + mult_36.b[0:9] mult_36.b[10:35] mult_36.out[36:71] + + mult_36.a[0:9] mult_36.a[10:35] mult_36.out[0:35] + + + @@ -208,6 +234,8 @@ + + @@ -686,6 +714,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml index baada7911..b6dad0a8f 100755 --- a/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml +++ b/openfpga_flow/vpr_arch/k6_frac_N10_tileable_adder_register_scan_chain_mem16K_depop50_12nm.xml @@ -193,7 +193,16 @@ - + + + + + + + + + +