OpenFPGA/openfpga_flow/benchmarks/vtr_benchmark/LU32PEEng.v

5710 lines
159 KiB
Verilog
Executable File

//auto-generated top.v
//top level module of LU factorization
//by Wei Zhang
`define NWIDTH 6'b010100
`define BLOCKWIDTH 4'b0111
`define DDRWIDTH 7'b0100000
`define DDRNUMDQS 4'b0100
`define DDRSIZEWIDTH 6'b011000
`define BURSTLEN 3'b010
`define MEMCONWIDTH 8'b01000000
`define MEMCONNUMBYTES 5'b01000
`define RAMWIDTH 12'b010000000000
`define RAMNUMBYTES 9'b010000000
`define RAMSIZEWIDTH 4'b0111
`define TOPWIDTH 7'b0100000
`define rFIFOINPUTWIDTH 8'b01000000
`define wFIFOINPUTWIDTH 12'b010000000000
`define mFIFOWIDTH 6'b011100
`define aFIFOWIDTH 4'b0111
module LU32PEEng (clk, //ref_clk, global_reset_n,
start, N, offset, done,
//mem_addr, mem_ba, mem_cas_n, mem_cke, mem_clk, mem_clk_n, mem_cs_n,
burst_begin,
mem_local_be,
mem_local_read_req,
mem_local_size,
mem_local_wdata,
mem_local_write_req,
mem_local_rdata,
mem_local_rdata_valid,
mem_local_ready,
mem_local_wdata_req,
reset_n,
mem_local_addr
//Cong: dummy output
//a_junk,
//w_junk,
//m_junk,
//r_junk,
//Cong:dummy output
//junk_r,
//junk_r1,
//junk_r2,
//junk_r3,
//junk_top
);
input start;
input[`NWIDTH-1:0] N;
input[`DDRSIZEWIDTH-1:0] offset;
output done;
input clk;
output burst_begin;
output [`MEMCONNUMBYTES-1:0] mem_local_be;
output mem_local_read_req;
output [`BURSTLEN-1:0] mem_local_size;
output [`MEMCONWIDTH-1:0] mem_local_wdata;
output mem_local_write_req;
output [`DDRSIZEWIDTH-1:0] mem_local_addr;
input [`MEMCONWIDTH-1:0] mem_local_rdata;
input mem_local_rdata_valid;
input mem_local_ready;
input reset_n;
input mem_local_wdata_req;
wire[`BLOCKWIDTH-1:0] m, n, loop;
wire[1:0] mode;
wire comp_start, comp_done;
wire dtu_write_req, dtu_read_req, dtu_ack, dtu_done;
wire [`DDRSIZEWIDTH-1:0] dtu_mem_addr;
wire [`RAMSIZEWIDTH-1:0] dtu_ram_addr;
wire [`BLOCKWIDTH-1:0] dtu_size;
wire left_sel;
wire[`RAMWIDTH-1:0] curWriteDataMem, curReadDataMem;
wire[`RAMSIZEWIDTH-1:0] curWriteAddrMem, curReadAddrMem;
wire[`RAMNUMBYTES-1:0] curWriteByteEnMem;
wire curWriteEnMem;
wire[`RAMWIDTH-1:0] leftWriteDataMem;
wire[`RAMSIZEWIDTH-1:0] leftWriteAddrMem;
wire[`RAMNUMBYTES-1:0] leftWriteByteEnMem;
wire leftWriteEnMem;
wire curMemSel, leftMemSel;
wire burst_begin;
wire [`MEMCONNUMBYTES-1:0] mem_local_be;
wire mem_local_read_req;
wire [`BURSTLEN-1:0] mem_local_size;
wire [`MEMCONWIDTH-1:0] mem_local_wdata;
wire mem_local_write_req;
wire [`MEMCONWIDTH-1:0] mem_local_rdata;
wire mem_local_rdata_valid;
wire mem_local_ready;
wire mem_local_wdata_req;
wire reset_n;
wire [`DDRSIZEWIDTH-1:0] mem_local_addr;
wire[`RAMWIDTH-1:0] ram_write_data, ram_read_data;
wire[`RAMSIZEWIDTH-1:0] ram_write_addr, ram_read_addr;
wire[`RAMNUMBYTES-1:0] ram_write_byte_en;
wire ram_write_en;
MarshallerController MC (clk, start, done, N, offset,
comp_start, m, n, loop, mode, comp_done, curMemSel, leftMemSel,
dtu_write_req, dtu_read_req, dtu_mem_addr, dtu_ram_addr, dtu_size, dtu_ack, dtu_done, left_sel);
// block that computes the LU factorization, with answer stored back into ram block
LU compBlock (clk, comp_start, m, n, loop, mode, comp_done,
curReadAddrMem, curReadDataMem, curWriteByteEnMem, curWriteDataMem, curWriteAddrMem, curWriteEnMem, curMemSel,
leftWriteByteEnMem, leftWriteDataMem, leftWriteAddrMem, leftWriteEnMem, leftMemSel);
DataTransferUnit DTU (.clk(clk), .dtu_write_req(dtu_write_req), .dtu_read_req(dtu_read_req), .dtu_mem_addr(dtu_mem_addr), .dtu_ram_addr(dtu_ram_addr), .dtu_size(dtu_size), .dtu_ack(dtu_ack), .dtu_done(dtu_done),
.ram_read_addr(ram_read_addr), .ram_read_data(ram_read_data), .ram_write_byte_en(ram_write_byte_en), .ram_write_data(ram_write_data), .ram_write_addr(ram_write_addr), .ram_write_en(ram_write_en),
.mem_rdata(mem_local_rdata), .mem_rdata_valid(mem_local_rdata_valid), .mem_ready(mem_local_ready), .mem_wdata_req(mem_local_wdata_req), .reset_n(reset_n),
.burst_begin(burst_begin), .mem_local_addr(mem_local_addr), .mem_be(mem_local_be), .mem_read_req(mem_local_read_req), .mem_size(mem_local_size),
.mem_wdata(mem_local_wdata), .mem_write_req(mem_local_write_req)
//Cong: dummy output
);
assign curReadAddrMem = ram_read_addr;
assign curWriteByteEnMem = ram_write_byte_en;
assign curWriteDataMem = ram_write_data;
assign curWriteAddrMem = ram_write_addr;
assign curWriteEnMem = ram_write_en && (left_sel == 0);
assign leftWriteByteEnMem = ram_write_byte_en;
assign leftWriteDataMem = ram_write_data;
assign leftWriteAddrMem = ram_write_addr;
assign leftWriteEnMem = ram_write_en && (left_sel == 1);
assign ram_read_data = curReadDataMem;
endmodule
`define BLOCKM 8'b01000000
`define BLOCKN 8'b01000000
`define BLOCKMDIVK 3'b010
`define MEMBLOCKM 7'b0100000
`define MEMBLOCKN 7'b0100000
`define NWIDTH 6'b010100
`define BLOCKWIDTH 4'b0111
`define DDRSIZEWIDTH 6'b011000
`define RAMSIZEWIDTH 4'b0111
`define START 1'b0 //0
`define SETUP 2'b01 //1
`define FIRST 3'b010 //2
`define MODE0_SETUP 3'b011 //3
`define MODE0_WAIT 4'b0100 //4
`define MODE0 4'b0101 //5
`define MODE1_SETUP 4'b0110 //6
`define MODE1_WAIT 4'b0111 //7
`define MODE1 5'b01000 //8
`define MODE2_SETUP 5'b01001 //9
`define MODE2_WAIT 5'b01010 //10
`define MODE2 5'b01011 //11
`define MODE3_SETUP 5'b01100 //12
`define MODE3_WAIT 5'b01101 //13
`define MODE3 5'b01110 //14
`define STALL 5'b01111 //15
`define STALL_WAIT 6'b010000 //16
`define WAIT 6'b010001 //17
`define FINAL_WRITE 6'b010010 //18
`define FINAL_WAIT 6'b010011 //19
`define IDLE 6'b010100 //20
`define LAST_SETUP 6'b010101 //21
`define LAST_SETUP_WAIT 6'b010110 //22
`define LAST 6'b010111 //23
`define LAST_WAIT 6'b011000 //24
`define MEM_IDLE 1'b0 //0
`define MEM_WRITE 2'b01 //1
`define MEM_WRITE_WAIT 3'b010 //2
`define MEM_CHECK_DONE 3'b011 //3
`define MEM_READ 4'b0100 //4
`define MEM_READ_WAIT 4'b0101 //5
`define MEM_DONE 4'b0110 //6
`define MEM_WAIT_DONE 4'b0111 //7
module MarshallerController (clk, start, done, input_N, offset,
comp_start, block_m, block_n, loop, mode, comp_done, cur_mem_sel, left_mem_sel,
dtu_write_req, dtu_read_req, dtu_mem_addr, dtu_ram_addr, dtu_size, dtu_ack, dtu_done, left_sel);
input clk;
input start;
output done;
input [`NWIDTH-1:0] input_N;
input [`DDRSIZEWIDTH-1:0] offset;
// for computation section
output comp_start;
output [`BLOCKWIDTH-1:0] block_m, block_n, loop;
output [1:0] mode;
input comp_done;
output cur_mem_sel, left_mem_sel;
// for data marshaller section
output dtu_write_req, dtu_read_req;
output [`DDRSIZEWIDTH-1:0] dtu_mem_addr;
output [`RAMSIZEWIDTH-1:0] dtu_ram_addr;
output [`BLOCKWIDTH-1:0] dtu_size;
input dtu_ack, dtu_done;
output left_sel;
reg [4:0] cur_state, next_state;
reg [`NWIDTH-1:0] comp_N, N, mcount, ncount, Ndivk, mem_N;
reg [1:0] mode;
reg [`BLOCKWIDTH-1:0] block_m, block_n, loop, read_n;
reg [`BLOCKWIDTH-1:0] write_n, write_n_buf;
reg left_mem_sel, cur_mem_sel, no_left_switch;
reg [3:0] cur_mem_state, next_mem_state;
reg [`RAMSIZEWIDTH-1:0] ram_addr;
reg [`DDRSIZEWIDTH-1:0] mem_addr;
reg [`DDRSIZEWIDTH-1:0] mem_base, mem_top, mem_write, mem_left, mem_cur;
reg [`DDRSIZEWIDTH-1:0] mem_write_buf;
reg [`BLOCKWIDTH-1:0] mem_count;
reg [1:0] mem_read;
reg [`BLOCKWIDTH-1:0] mem_write_size, mem_write_size_buf, mem_read_size;
wire mem_done;
assign done = (cur_state == `IDLE);
assign dtu_ram_addr = ram_addr;
assign dtu_mem_addr = mem_addr;
assign dtu_size = (cur_mem_state == `MEM_WRITE) ? mem_write_size : mem_read_size;
assign comp_start = (cur_state == `MODE0)||(cur_state == `MODE1)||(cur_state == `MODE2)||(cur_state == `MODE3)||(cur_state == `FIRST)||(cur_state == `LAST);
assign dtu_write_req = (cur_mem_state == `MEM_WRITE);
assign dtu_read_req = (cur_mem_state == `MEM_READ);
assign mem_done = (cur_mem_state == `MEM_DONE)&&(dtu_done == 1'b1);
assign left_sel = mem_read == 2'b01 && (cur_mem_state == `MEM_READ || cur_mem_state == `MEM_READ_WAIT || cur_mem_state == `MEM_WAIT_DONE);
// FSM to produce memory instructions to DTU
always @ (posedge clk)
begin
case (cur_mem_state)
`MEM_IDLE:
begin
if (cur_state == `START)
next_mem_state <= `MEM_CHECK_DONE;
else
next_mem_state <= `MEM_IDLE;
end
`MEM_DONE:
begin
if (cur_state == `MODE0 || cur_state == `MODE1 || cur_state == `MODE2 ||
cur_state == `MODE3 || cur_state == `FINAL_WRITE || cur_state == `LAST_SETUP)
next_mem_state <= `MEM_WRITE;
else if (cur_state == `FIRST)
next_mem_state <= `MEM_CHECK_DONE;
else
next_mem_state <= `MEM_DONE;
end
`MEM_WRITE:
begin
next_mem_state <= `MEM_WRITE_WAIT;
end
`MEM_WRITE_WAIT:
begin
if (dtu_ack == 1'b1)
begin
if (mem_count == write_n)
next_mem_state <= `MEM_WAIT_DONE;
else
next_mem_state <= `MEM_WRITE;
end
else
next_mem_state <= `MEM_WRITE_WAIT;
end
`MEM_WAIT_DONE:
begin
if (dtu_done == 1'b1)
next_mem_state <= `MEM_CHECK_DONE;
else
next_mem_state <= `MEM_WAIT_DONE;
end
`MEM_CHECK_DONE:
begin
if (mem_read == 2'b10)
next_mem_state <= `MEM_DONE;
else
next_mem_state <= `MEM_READ;
end
`MEM_READ:
begin
next_mem_state <= `MEM_READ_WAIT;
end
`MEM_READ_WAIT:
begin
if (dtu_ack == 1'b1)
begin
if (mem_count == read_n)
next_mem_state <= `MEM_WAIT_DONE;
else
next_mem_state <= `MEM_READ;
end
else
next_mem_state <= `MEM_READ_WAIT;
end
default:
next_mem_state <= `MEM_IDLE;
endcase
end
always @ (posedge clk)
begin
if (cur_mem_state == `MEM_DONE || cur_mem_state == `MEM_IDLE)
begin
ram_addr <= 7'b0;
mem_addr <= mem_write;
if (next_state == `LAST_WAIT || next_state == `FINAL_WAIT || next_state == `STALL)
mem_read <= 2'b00;
else if (next_state == `MODE0_SETUP || next_state == `SETUP || cur_state == `MODE0 || next_state == `LAST_SETUP_WAIT)
mem_read <= 2'b01;
else
mem_read <= 2'b10;
mem_count <= 7'b0;
end
else if (cur_mem_state == `MEM_CHECK_DONE)
begin
if (mem_read == 2'b10)
begin
mem_addr <= mem_left;
read_n <= loop;
end
else
begin
mem_addr <= mem_cur;
read_n <= block_n;
end
mem_read <= mem_read - 2'b01;
mem_count <= 7'b0;
ram_addr <= 7'b0;
end
else if (cur_mem_state == `MEM_WRITE || cur_mem_state == `MEM_READ)
begin
ram_addr <= ram_addr + `BLOCKMDIVK;
mem_addr <= mem_addr + Ndivk;
mem_count <= mem_count + 2'b01;
end
end
// FSM to determine the block LU factorization algorithm
always @ (posedge clk)
begin
case (cur_state)
`START:
begin
next_state <= `SETUP;
end
`SETUP:
begin
next_state <= `WAIT;
end
`WAIT:
begin
if (mem_done == 1'b1)
next_state <= `FIRST;
else
next_state <= `WAIT;
end
`FIRST:
begin
if (mcount < comp_N)
next_state <= `MODE1_SETUP;
else if (ncount < comp_N)
next_state <= `MODE2_SETUP;
else
next_state <= `LAST_WAIT;
end
`MODE0_SETUP:
begin
next_state <= `MODE0_WAIT;
end
`MODE0_WAIT:
begin
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `MODE0;
else
next_state <= `MODE0_WAIT;
end
`MODE0:
begin
if (mcount < comp_N)
next_state <= `MODE1_SETUP;
else if (ncount < comp_N)
next_state <= `MODE2_SETUP;
else
begin
next_state <= `LAST_WAIT;
end
end
`MODE1_SETUP:
begin
next_state <= `MODE1_WAIT;
end
`MODE1_WAIT:
begin
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `MODE1;
else
next_state <= `MODE1_WAIT;
end
`MODE1:
begin
if (mcount < comp_N)
next_state <= `MODE1_SETUP;
else if (ncount < comp_N)
next_state <= `MODE2_SETUP;
else if (comp_N <= `BLOCKN + `BLOCKN)
next_state <= `STALL;
else
next_state <= `MODE0_SETUP;
end
`MODE2_SETUP:
begin
next_state <= `MODE2_WAIT;
end
`MODE2_WAIT:
begin
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `MODE2;
else
next_state <= `MODE2_WAIT;
end
`MODE2:
begin
if (mcount < comp_N)
next_state <= `MODE3_SETUP;
else if (ncount < comp_N)
next_state <= `MODE2_SETUP;
else if (comp_N <= `BLOCKN + `BLOCKN)
next_state <= `STALL;
else
next_state <= `MODE0_SETUP;
end
`MODE3_SETUP:
begin
next_state <= `MODE3_WAIT;
end
`MODE3_WAIT:
begin
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `MODE3;
else
next_state <= `MODE3_WAIT;
end
`MODE3:
begin
if (mcount < comp_N)
next_state <= `MODE3_SETUP;
else if (ncount < comp_N)
next_state <= `MODE2_SETUP;
else if (comp_N <= `BLOCKN + `BLOCKN)
next_state <= `STALL;
else
next_state <= `MODE0_SETUP;
end
`STALL:
next_state <= `STALL_WAIT;
`STALL_WAIT:
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `LAST_SETUP;
else
next_state <= `STALL_WAIT;
`LAST_SETUP:
next_state <= `LAST_SETUP_WAIT;
`LAST_SETUP_WAIT:
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `LAST;
else
next_state <= `LAST_SETUP_WAIT;
`LAST:
next_state <= `LAST_WAIT;
`LAST_WAIT:
if (mem_done == 1'b1 && comp_done == 1'b1)
next_state <= `FINAL_WRITE;
else
next_state <= `LAST_WAIT;
`FINAL_WRITE:
next_state <= `FINAL_WAIT;
`FINAL_WAIT:
if (mem_done == 1'b1)
next_state <= `IDLE;
else
next_state <= `FINAL_WAIT;
`IDLE:
if (start)
next_state <= `SETUP;
else
next_state <= `IDLE;
default:
next_state <= `START;
endcase
end
always @ (posedge clk)
begin
if (start)
begin
cur_state <= `START;
cur_mem_state <= `MEM_IDLE;
end
else
begin
cur_state <= next_state;
cur_mem_state <= next_mem_state;
end
end
always @ (cur_state)
begin
case (cur_state)
`MODE1:
mode = 2'b01;
`MODE2:
mode = 2'b10;
`MODE3:
mode = 2'b11;
default:
mode = 2'b00;
endcase
end
always @ (posedge clk)
begin
if (start)
begin
comp_N <= input_N;
N <= input_N;
end
else if (next_state == `MODE0)
begin
comp_N <= comp_N - `BLOCKN;
end
Ndivk <= ((N+`BLOCKM-1)>>6)<<5;
mem_N <= Ndivk<<6;
if (start)
begin
mem_base <= offset;
mem_top <= offset;
mem_left <= offset;
mem_cur <= offset;
end
else if (cur_state == `MODE0_SETUP)
begin
mem_base <= mem_base + mem_N+`MEMBLOCKN;
mem_top <= mem_base + mem_N+`MEMBLOCKN;
mem_cur <= mem_base + mem_N+`MEMBLOCKN;
mem_left <= mem_base + mem_N+`MEMBLOCKN;
end
else if (cur_state == `MODE1_SETUP)
begin
mem_cur <= mem_cur + `MEMBLOCKM;
end
else if (cur_state == `MODE3_SETUP)
begin
mem_cur <= mem_cur + `MEMBLOCKM;
mem_left <= mem_left + `MEMBLOCKM;
end
else if (cur_state == `MODE2_SETUP)
begin
mem_cur <= mem_top + mem_N;
mem_top <= mem_top + mem_N;
mem_left <= mem_base;
end
if (cur_state == `SETUP)
begin
mem_write <= 24'b0;
mem_write_buf <= 24'b0;
mem_write_size <= `BLOCKMDIVK;
mem_write_size_buf <= `BLOCKMDIVK;
write_n <= block_n;
write_n_buf <= block_n;
end
else if (cur_mem_state == `MEM_CHECK_DONE && mem_read == 0)
begin
mem_write <= mem_write_buf;
mem_write_buf <= mem_cur;
mem_write_size <= mem_write_size_buf;
mem_write_size_buf <= mem_read_size;
write_n <= write_n_buf;
write_n_buf <= block_n;
end
mem_read_size <= `BLOCKMDIVK;
if (start) begin
loop <= `BLOCKN;
end else if (next_state == `LAST) begin
loop <= comp_N[8:0] - `BLOCKN;
end
if (cur_state == `MODE0_SETUP || cur_state == `MODE2_SETUP || start) begin
mcount <= `BLOCKM;
end else if (cur_state == `MODE1_SETUP || cur_state == `MODE3_SETUP) begin
mcount <= mcount+`BLOCKM;
end
if (cur_state == `MODE0_SETUP || start) begin
ncount <= `BLOCKN;
end else if (cur_state == `MODE2_SETUP) begin
ncount <= ncount+`BLOCKN;
end
if (mcount < comp_N) begin
block_m <= `BLOCKM;
end else begin
block_m <= comp_N - mcount + `BLOCKM;
end
if (ncount < comp_N) begin
block_n <= `BLOCKN;
end else begin
block_n <= comp_N - ncount + `BLOCKN;
end
if (start) begin
cur_mem_sel <= 1'b0;
end else if ((cur_state == `MODE0)||(cur_state == `MODE1)||(cur_state == `MODE2)||(cur_state == `MODE3)||
(cur_state == `FIRST)||(cur_state == `FINAL_WRITE)||(cur_state == `LAST_SETUP)||(cur_state == `LAST)) begin
cur_mem_sel <= !cur_mem_sel;
end
if (start) begin
no_left_switch <= 1'b0;
end else if ((cur_state == `MODE0)||(cur_state == `FIRST)) begin
no_left_switch <= 1'b1;
end else if ((cur_state == `MODE1)||(cur_state == `MODE2)||(cur_state == `MODE3)||
(cur_state == `FINAL_WRITE)||(cur_state == `LAST_SETUP)) begin
no_left_switch <= 1'b0;
end
if (start) begin
left_mem_sel <= 1'b0;
end else if (((cur_state == `MODE0)||(cur_state ==`MODE1)||(cur_state == `MODE2)||(cur_state == `MODE3)||
(cur_state == `FIRST)||(cur_state == `FINAL_WRITE)||(cur_state == `LAST_SETUP))&&(no_left_switch == 1'b0)) begin
left_mem_sel <= !left_mem_sel;
end
end
endmodule
//topoutputdelay = 1
//auto-generated LU.v
//datapath for computating LU factorization
//by Wei Zhang
`define rRAMSIZEWIDTH 7
`define cSETUP 4'b0000
`define cSTART 4'b0001
`define cFETCH_COL 4'b0010
`define cWAIT_COL 4'b0011
`define cFIND_REC 4'b0100
`define cMULT_COL 4'b0101
`define cUPDATE_J 4'b0110
`define cSTORE_MO 4'b0111
`define cMULT_SUB 4'b1000
`define cINCRE_I 4'b1001
`define cWAIT 4'b1010
`define cDONE 4'b1011
`define cSTORE_DIAG 4'b1100
`define cSTORE_DIAG2 4'b1101
`define cSTART_FETCH_ROW 4'b1110
`define cROW_WAIT 2'b00
`define cFETCH_ROW 2'b01
`define cDONE_FETCH_ROW 2'b10
`define cLOAD_ROW_INC_J 2'b11
`define PRECISION 7'b0100000
`define NUMPE 7'b0100000
`define PEWIDTH 4'b0101
`define BLOCKWIDTH 4'b0111
`define RAMWIDTH 12'b010000000000
`define RAMNUMBYTES 9'b010000000
`define RAMSIZEWIDTH 4'b0111
`define TOPSIZEWIDTH 5'b01100
`define TOPINPUTDELAY 3'b011
`define TOPOUTPUTDELAY 2'b01
`define MEMINPUTDELAY 3'b010
`define MEMOUTPUTDELAY 2'b01
`define TOPWIDTH 7'b0100000
module LU (clk, start, m, n, loop, mode, done,
curReadAddrMem, curReadDataMem, curWriteByteEnMem, curWriteDataMem, curWriteAddrMem, curWriteEnMem, curMemSel,
leftWriteByteEnMem, leftWriteDataMem, leftWriteAddrMem, leftWriteEnMem, leftMemSel
);
input clk, start;
input[`BLOCKWIDTH-1:0] m, n, loop;
input[1:0] mode;
output done;
wire[`RAMWIDTH-1:0] curWriteData0, curWriteData1;
wire[`RAMSIZEWIDTH-1:0] curWriteAddr0, curReadAddr0, curWriteAddr1, curReadAddr1;
wire[`RAMWIDTH-1:0] curReadData0, curReadData1;
wire[`RAMNUMBYTES-1:0] curWriteByteEn0, curWriteByteEn1;
wire curWriteEn0, curWriteEn1;
input[`RAMWIDTH-1:0] curWriteDataMem;
output[`RAMWIDTH-1:0] curReadDataMem;
input[`RAMSIZEWIDTH-1:0] curWriteAddrMem, curReadAddrMem;
input[`RAMNUMBYTES-1:0] curWriteByteEnMem;
input curWriteEnMem;
input[`RAMWIDTH-1:0] leftWriteDataMem;
input[`RAMSIZEWIDTH-1:0] leftWriteAddrMem;
input[`RAMNUMBYTES-1:0] leftWriteByteEnMem;
input leftWriteEnMem;
input leftMemSel, curMemSel;
wire[`RAMWIDTH-1:0] curReadDataLU, curReadDataMem;
wire[`RAMWIDTH-1:0] curWriteDataLU, curWriteDataMem;
wire[`RAMSIZEWIDTH-1:0] curWriteAddrLU, curWriteAddrMem, curReadAddrLU, curReadAddrMem;
wire[`RAMNUMBYTES-1:0] curWriteByteEnLU, curWriteByteEnMem;
wire curWriteEnLU, curWriteEnMem;
reg[`RAMWIDTH-1:0] curReadData0Reg0;
reg[`RAMWIDTH-1:0] curReadData1Reg0;
reg[`RAMWIDTH-1:0] leftReadData0Reg0;
reg[`RAMWIDTH-1:0] leftReadData1Reg0;
reg[`RAMWIDTH-1:0] curWriteData0Reg0;
reg[`RAMWIDTH-1:0] curWriteData0Reg1;
reg[`RAMWIDTH-1:0] curWriteData1Reg0;
reg[`RAMWIDTH-1:0] curWriteData1Reg1;
reg[`RAMSIZEWIDTH-1:0] curWriteAddr0Reg0;
reg[`RAMSIZEWIDTH-1:0] curWriteAddr0Reg1;
reg[`RAMSIZEWIDTH-1:0] curReadAddr0Reg0;
reg[`RAMSIZEWIDTH-1:0] curReadAddr0Reg1;
reg[`RAMSIZEWIDTH-1:0] curWriteAddr1Reg0;
reg[`RAMSIZEWIDTH-1:0] curWriteAddr1Reg1;
reg[`RAMSIZEWIDTH-1:0] curReadAddr1Reg0;
reg[`RAMSIZEWIDTH-1:0] curReadAddr1Reg1;
reg[`RAMNUMBYTES-1:0] curWriteByteEn0Reg0;
reg[`RAMNUMBYTES-1:0] curWriteByteEn0Reg1;
reg[`RAMNUMBYTES-1:0] curWriteByteEn1Reg0;
reg[`RAMNUMBYTES-1:0] curWriteByteEn1Reg1;
reg curWriteEn0Reg0;
reg curWriteEn0Reg1;
reg curWriteEn1Reg0;
reg curWriteEn1Reg1;
reg[`RAMWIDTH-1:0] leftWriteData0Reg0;
reg[`RAMWIDTH-1:0] leftWriteData0Reg1;
reg[`RAMWIDTH-1:0] leftWriteData1Reg0;
reg[`RAMWIDTH-1:0] leftWriteData1Reg1;
reg[`RAMSIZEWIDTH-1:0] leftWriteAddr0Reg0;
reg[`RAMSIZEWIDTH-1:0] leftWriteAddr0Reg1;
reg[`RAMSIZEWIDTH-1:0] leftReadAddr0Reg0;
reg[`RAMSIZEWIDTH-1:0] leftReadAddr0Reg1;
reg[`RAMSIZEWIDTH-1:0] leftWriteAddr1Reg0;
reg[`RAMSIZEWIDTH-1:0] leftWriteAddr1Reg1;
reg[`RAMSIZEWIDTH-1:0] leftReadAddr1Reg0;
reg[`RAMSIZEWIDTH-1:0] leftReadAddr1Reg1;
reg[`RAMNUMBYTES-1:0] leftWriteByteEn0Reg0;
reg[`RAMNUMBYTES-1:0] leftWriteByteEn0Reg1;
reg[`RAMNUMBYTES-1:0] leftWriteByteEn1Reg0;
reg[`RAMNUMBYTES-1:0] leftWriteByteEn1Reg1;
reg leftWriteEn0Reg0;
reg leftWriteEn0Reg1;
reg leftWriteEn1Reg0;
reg leftWriteEn1Reg1;
reg[`PRECISION-1:0] multOperand;
reg[`PRECISION-1:0] diag;
wire[`PRECISION-1:0] recResult;
wire[`PRECISION-1:0] multA0;
wire[`PRECISION-1:0] multA1;
wire[`PRECISION-1:0] multA2;
wire[`PRECISION-1:0] multA3;
wire[`PRECISION-1:0] multA4;
wire[`PRECISION-1:0] multA5;
wire[`PRECISION-1:0] multA6;
wire[`PRECISION-1:0] multA7;
wire[`PRECISION-1:0] multA8;
wire[`PRECISION-1:0] multA9;
wire[`PRECISION-1:0] multA10;
wire[`PRECISION-1:0] multA11;
wire[`PRECISION-1:0] multA12;
wire[`PRECISION-1:0] multA13;
wire[`PRECISION-1:0] multA14;
wire[`PRECISION-1:0] multA15;
wire[`PRECISION-1:0] multA16;
wire[`PRECISION-1:0] multA17;
wire[`PRECISION-1:0] multA18;
wire[`PRECISION-1:0] multA19;
wire[`PRECISION-1:0] multA20;
wire[`PRECISION-1:0] multA21;
wire[`PRECISION-1:0] multA22;
wire[`PRECISION-1:0] multA23;
wire[`PRECISION-1:0] multA24;
wire[`PRECISION-1:0] multA25;
wire[`PRECISION-1:0] multA26;
wire[`PRECISION-1:0] multA27;
wire[`PRECISION-1:0] multA28;
wire[`PRECISION-1:0] multA29;
wire[`PRECISION-1:0] multA30;
wire[`PRECISION-1:0] multA31;
wire[`PRECISION-1:0] multResult0;
wire[`PRECISION-1:0] multResult1;
wire[`PRECISION-1:0] multResult2;
wire[`PRECISION-1:0] multResult3;
wire[`PRECISION-1:0] multResult4;
wire[`PRECISION-1:0] multResult5;
wire[`PRECISION-1:0] multResult6;
wire[`PRECISION-1:0] multResult7;
wire[`PRECISION-1:0] multResult8;
wire[`PRECISION-1:0] multResult9;
wire[`PRECISION-1:0] multResult10;
wire[`PRECISION-1:0] multResult11;
wire[`PRECISION-1:0] multResult12;
wire[`PRECISION-1:0] multResult13;
wire[`PRECISION-1:0] multResult14;
wire[`PRECISION-1:0] multResult15;
wire[`PRECISION-1:0] multResult16;
wire[`PRECISION-1:0] multResult17;
wire[`PRECISION-1:0] multResult18;
wire[`PRECISION-1:0] multResult19;
wire[`PRECISION-1:0] multResult20;
wire[`PRECISION-1:0] multResult21;
wire[`PRECISION-1:0] multResult22;
wire[`PRECISION-1:0] multResult23;
wire[`PRECISION-1:0] multResult24;
wire[`PRECISION-1:0] multResult25;
wire[`PRECISION-1:0] multResult26;
wire[`PRECISION-1:0] multResult27;
wire[`PRECISION-1:0] multResult28;
wire[`PRECISION-1:0] multResult29;
wire[`PRECISION-1:0] multResult30;
wire[`PRECISION-1:0] multResult31;
wire[`PRECISION-1:0] addA0;
wire[`PRECISION-1:0] addA1;
wire[`PRECISION-1:0] addA2;
wire[`PRECISION-1:0] addA3;
wire[`PRECISION-1:0] addA4;
wire[`PRECISION-1:0] addA5;
wire[`PRECISION-1:0] addA6;
wire[`PRECISION-1:0] addA7;
wire[`PRECISION-1:0] addA8;
wire[`PRECISION-1:0] addA9;
wire[`PRECISION-1:0] addA10;
wire[`PRECISION-1:0] addA11;
wire[`PRECISION-1:0] addA12;
wire[`PRECISION-1:0] addA13;
wire[`PRECISION-1:0] addA14;
wire[`PRECISION-1:0] addA15;
wire[`PRECISION-1:0] addA16;
wire[`PRECISION-1:0] addA17;
wire[`PRECISION-1:0] addA18;
wire[`PRECISION-1:0] addA19;
wire[`PRECISION-1:0] addA20;
wire[`PRECISION-1:0] addA21;
wire[`PRECISION-1:0] addA22;
wire[`PRECISION-1:0] addA23;
wire[`PRECISION-1:0] addA24;
wire[`PRECISION-1:0] addA25;
wire[`PRECISION-1:0] addA26;
wire[`PRECISION-1:0] addA27;
wire[`PRECISION-1:0] addA28;
wire[`PRECISION-1:0] addA29;
wire[`PRECISION-1:0] addA30;
wire[`PRECISION-1:0] addA31;
wire[`PRECISION-1:0] addResult0;
wire[`PRECISION-1:0] addResult1;
wire[`PRECISION-1:0] addResult2;
wire[`PRECISION-1:0] addResult3;
wire[`PRECISION-1:0] addResult4;
wire[`PRECISION-1:0] addResult5;
wire[`PRECISION-1:0] addResult6;
wire[`PRECISION-1:0] addResult7;
wire[`PRECISION-1:0] addResult8;
wire[`PRECISION-1:0] addResult9;
wire[`PRECISION-1:0] addResult10;
wire[`PRECISION-1:0] addResult11;
wire[`PRECISION-1:0] addResult12;
wire[`PRECISION-1:0] addResult13;
wire[`PRECISION-1:0] addResult14;
wire[`PRECISION-1:0] addResult15;
wire[`PRECISION-1:0] addResult16;
wire[`PRECISION-1:0] addResult17;
wire[`PRECISION-1:0] addResult18;
wire[`PRECISION-1:0] addResult19;
wire[`PRECISION-1:0] addResult20;
wire[`PRECISION-1:0] addResult21;
wire[`PRECISION-1:0] addResult22;
wire[`PRECISION-1:0] addResult23;
wire[`PRECISION-1:0] addResult24;
wire[`PRECISION-1:0] addResult25;
wire[`PRECISION-1:0] addResult26;
wire[`PRECISION-1:0] addResult27;
wire[`PRECISION-1:0] addResult28;
wire[`PRECISION-1:0] addResult29;
wire[`PRECISION-1:0] addResult30;
wire[`PRECISION-1:0] addResult31;
wire[`RAMWIDTH-1:0] leftReadData0, leftReadData1, leftWriteData0, leftWriteData1;
wire[`RAMSIZEWIDTH-1:0] leftWriteAddr0, leftWriteAddr1, leftReadAddr0, leftReadAddr1;
wire[`RAMNUMBYTES-1:0] leftWriteByteEn0, leftWriteByteEn1;
wire leftWriteEn0, leftWriteEn1;
wire[`RAMWIDTH-1:0] leftReadDataLU, leftWriteDataLU, leftWriteDataMem;
wire[`RAMSIZEWIDTH-1:0] leftWriteAddrLU, leftWriteAddrMem, leftReadAddrLU;
wire[`RAMNUMBYTES-1:0] leftWriteByteEnLU, leftWriteByteEnMem;
wire leftWriteEnLU, leftWriteEnMem;
wire[`PRECISION-1:0] topWriteData;
reg[`PRECISION-1:0] topWriteDataLU;
wire[`PRECISION-1:0] topReadData, topReadDataLU;
wire[`TOPSIZEWIDTH-1:0] topWriteAddr, topWriteAddrLU, topReadAddr, topReadAddrLU;
wire topWriteEn, topWriteEnLU;
reg[`PRECISION-1:0] topReadDataReg0;
reg[`PRECISION-1:0] topWriteDataReg0;
reg[`PRECISION-1:0] topWriteDataReg1;
reg[`PRECISION-1:0] topWriteDataReg2;
reg[`TOPSIZEWIDTH-1:0] topWriteAddrReg0;
reg[`TOPSIZEWIDTH-1:0] topWriteAddrReg1;
reg[`TOPSIZEWIDTH-1:0] topWriteAddrReg2;
reg[`TOPSIZEWIDTH-1:0] topReadAddrReg0;
reg[`TOPSIZEWIDTH-1:0] topReadAddrReg1;
reg[`TOPSIZEWIDTH-1:0] topReadAddrReg2;
reg topWriteEnReg0;
reg topWriteEnReg1;
reg topWriteEnReg2;
wire[`RAMWIDTH-1:0] rcWriteData;
wire leftWriteSel, curWriteSel, topSourceSel;
wire diagEn;
wire[`PEWIDTH-1:0] topWriteSel;
wire MOSel;
wire MOEn;
// control block
LUControl conBlock (clk, start, m, n, loop, mode, done,
curReadAddrLU, curWriteAddrLU, curWriteByteEnLU, curWriteEnLU, curWriteSel,
leftReadAddrLU, leftWriteAddrLU, leftWriteByteEnLU, leftWriteEnLU, leftWriteSel,
topReadAddrLU, topWriteAddrLU, topWriteEnLU, topWriteSel, topSourceSel, diagEn, MOSel, MOEn);
// fp_div unit
//floating point divider here
fpu_div rec(.clock(clk), .n(32'h3F800000), .d(diag), .div(recResult));
// on-chip memory blocks that store the matrix to be LU factorized
// store current blocks data
ram currentBlock0 (curWriteByteEn0, clk, curWriteData0, curReadAddr0, curWriteAddr0, curWriteEn0, curReadData0 );
ram1 currentBlock1 (curWriteByteEn1, clk, curWriteData1, curReadAddr1, curWriteAddr1, curWriteEn1, curReadData1 );
// store left blocks data
ram2 leftBlock0(leftWriteByteEn0, clk, leftWriteData0, leftReadAddr0, leftWriteAddr0, leftWriteEn0, leftReadData0 );
ram3 leftBlock1(leftWriteByteEn1, clk, leftWriteData1, leftReadAddr1, leftWriteAddr1, leftWriteEn1, leftReadData1 );
// store top block data
top_ram topBlock(clk, topWriteData, topReadAddr, topWriteAddr, topWriteEn, topReadDataLU );
// processing elements that does the main computation of LU factorization
mult_add PE0 (clk, multA0, multOperand, addA0, multResult0, addResult0);
mult_add PE1 (clk, multA1, multOperand, addA1, multResult1, addResult1);
mult_add PE2 (clk, multA2, multOperand, addA2, multResult2, addResult2);
mult_add PE3 (clk, multA3, multOperand, addA3, multResult3, addResult3);
mult_add PE4 (clk, multA4, multOperand, addA4, multResult4, addResult4);
mult_add PE5 (clk, multA5, multOperand, addA5, multResult5, addResult5);
mult_add PE6 (clk, multA6, multOperand, addA6, multResult6, addResult6);
mult_add PE7 (clk, multA7, multOperand, addA7, multResult7, addResult7);
mult_add PE8 (clk, multA8, multOperand, addA8, multResult8, addResult8);
mult_add PE9 (clk, multA9, multOperand, addA9, multResult9, addResult9);
mult_add PE10 (clk, multA10, multOperand, addA10, multResult10, addResult10);
mult_add PE11 (clk, multA11, multOperand, addA11, multResult11, addResult11);
mult_add PE12 (clk, multA12, multOperand, addA12, multResult12, addResult12);
mult_add PE13 (clk, multA13, multOperand, addA13, multResult13, addResult13);
mult_add PE14 (clk, multA14, multOperand, addA14, multResult14, addResult14);
mult_add PE15 (clk, multA15, multOperand, addA15, multResult15, addResult15);
mult_add PE16 (clk, multA16, multOperand, addA16, multResult16, addResult16);
mult_add PE17 (clk, multA17, multOperand, addA17, multResult17, addResult17);
mult_add PE18 (clk, multA18, multOperand, addA18, multResult18, addResult18);
mult_add PE19 (clk, multA19, multOperand, addA19, multResult19, addResult19);
mult_add PE20 (clk, multA20, multOperand, addA20, multResult20, addResult20);
mult_add PE21 (clk, multA21, multOperand, addA21, multResult21, addResult21);
mult_add PE22 (clk, multA22, multOperand, addA22, multResult22, addResult22);
mult_add PE23 (clk, multA23, multOperand, addA23, multResult23, addResult23);
mult_add PE24 (clk, multA24, multOperand, addA24, multResult24, addResult24);
mult_add PE25 (clk, multA25, multOperand, addA25, multResult25, addResult25);
mult_add PE26 (clk, multA26, multOperand, addA26, multResult26, addResult26);
mult_add PE27 (clk, multA27, multOperand, addA27, multResult27, addResult27);
mult_add PE28 (clk, multA28, multOperand, addA28, multResult28, addResult28);
mult_add PE29 (clk, multA29, multOperand, addA29, multResult29, addResult29);
mult_add PE30 (clk, multA30, multOperand, addA30, multResult30, addResult30);
mult_add PE31 (clk, multA31, multOperand, addA31, multResult31, addResult31);
// connect to ports of the left blocks
assign leftWriteDataLU = (leftWriteSel == 1'b0) ? curReadDataLU : rcWriteData;
always @ (posedge clk)
begin
if(leftMemSel == 1'b0)
begin
leftWriteData0Reg0 <= leftWriteDataMem;
leftWriteAddr0Reg0 <= leftWriteAddrMem;
leftWriteByteEn0Reg0 <= leftWriteByteEnMem;
leftWriteEn0Reg0 <= leftWriteEnMem;
leftWriteData1Reg0 <= leftWriteDataLU;
leftWriteAddr1Reg0 <= leftWriteAddrLU;
leftWriteByteEn1Reg0 <= leftWriteByteEnLU;
leftWriteEn1Reg0 <= leftWriteEnLU;
end
else
begin
leftWriteData0Reg0 <= leftWriteDataLU;
leftWriteAddr0Reg0 <= leftWriteAddrLU;
leftWriteByteEn0Reg0 <= leftWriteByteEnLU;
leftWriteEn0Reg0 <= leftWriteEnLU;
leftWriteData1Reg0 <= leftWriteDataMem;
leftWriteAddr1Reg0 <= leftWriteAddrMem;
leftWriteByteEn1Reg0 <= leftWriteByteEnMem;
leftWriteEn1Reg0 <= leftWriteEnMem;
end
leftReadAddr0Reg0 <= leftReadAddrLU;
leftReadAddr1Reg0 <= leftReadAddrLU;
leftWriteData0Reg1 <= leftWriteData0Reg0;
leftWriteAddr0Reg1 <= leftWriteAddr0Reg0;
leftReadAddr0Reg1 <= leftReadAddr0Reg0;
leftWriteByteEn0Reg1 <= leftWriteByteEn0Reg0;
leftWriteEn0Reg1 <= leftWriteEn0Reg0;
leftWriteData1Reg1 <= leftWriteData1Reg0;
leftWriteAddr1Reg1 <= leftWriteAddr1Reg0;
leftReadAddr1Reg1 <= leftReadAddr1Reg0;
leftWriteByteEn1Reg1 <= leftWriteByteEn1Reg0;
leftWriteEn1Reg1 <= leftWriteEn1Reg0;
end
assign leftWriteData0 = leftWriteData0Reg1;
assign leftWriteAddr0 = leftWriteAddr0Reg1;
assign leftReadAddr0 = leftReadAddr0Reg1;
assign leftWriteByteEn0 = leftWriteByteEn0Reg1;
assign leftWriteEn0 = leftWriteEn0Reg1;
assign leftWriteData1 = leftWriteData1Reg1;
assign leftWriteAddr1 = leftWriteAddr1Reg1;
assign leftReadAddr1 = leftReadAddr1Reg1;
assign leftWriteByteEn1 = leftWriteByteEn1Reg1;
assign leftWriteEn1 = leftWriteEn1Reg1;
always @ (posedge clk)
begin
leftReadData0Reg0 <= leftReadData0;
leftReadData1Reg0 <= leftReadData1;
end
assign leftReadDataLU = (leftMemSel == 1'b0) ? leftReadData1Reg0 : leftReadData0Reg0;
// data feed to fp div unit
always @ (posedge clk)
begin
if (diagEn == 1'b1)
begin
diag <= topReadData;
end
end
// one of the inputs to the PE
always @ (posedge clk)
begin
if (start == 1'b1)
multOperand <= 0;
else if (MOEn == 1'b1)
begin
if (MOSel == 1'b0)
multOperand <= recResult;
else
multOperand <= topReadData;
end
end
// connections to top block memory ports
always @ (topSourceSel or topWriteSel or curReadDataLU or addResult31 or addResult30 or addResult29 or addResult28 or addResult27 or addResult26 or addResult25 or addResult24 or addResult23 or addResult22 or addResult21 or addResult20 or addResult19 or addResult18 or addResult17 or addResult16 or addResult15 or addResult14 or addResult13 or addResult12 or addResult11 or addResult10 or addResult9 or addResult8 or addResult7 or addResult6 or addResult5 or addResult4 or addResult3 or addResult2 or addResult1 or addResult0)
begin
if (topSourceSel == 1'b0)
case (topWriteSel)
0:
topWriteDataLU = curReadDataLU[1023:992];
1:
topWriteDataLU = curReadDataLU[991:960];
2:
topWriteDataLU = curReadDataLU[959:928];
3:
topWriteDataLU = curReadDataLU[927:896];
4:
topWriteDataLU = curReadDataLU[895:864];
5:
topWriteDataLU = curReadDataLU[863:832];
6:
topWriteDataLU = curReadDataLU[831:800];
7:
topWriteDataLU = curReadDataLU[799:768];
8:
topWriteDataLU = curReadDataLU[767:736];
9:
topWriteDataLU = curReadDataLU[735:704];
10:
topWriteDataLU = curReadDataLU[703:672];
11:
topWriteDataLU = curReadDataLU[671:640];
12:
topWriteDataLU = curReadDataLU[639:608];
13:
topWriteDataLU = curReadDataLU[607:576];
14:
topWriteDataLU = curReadDataLU[575:544];
15:
topWriteDataLU = curReadDataLU[543:512];
16:
topWriteDataLU = curReadDataLU[511:480];
17:
topWriteDataLU = curReadDataLU[479:448];
18:
topWriteDataLU = curReadDataLU[447:416];
19:
topWriteDataLU = curReadDataLU[415:384];
20:
topWriteDataLU = curReadDataLU[383:352];
21:
topWriteDataLU = curReadDataLU[351:320];
22:
topWriteDataLU = curReadDataLU[319:288];
23:
topWriteDataLU = curReadDataLU[287:256];
24:
topWriteDataLU = curReadDataLU[255:224];
25:
topWriteDataLU = curReadDataLU[223:192];
26:
topWriteDataLU = curReadDataLU[191:160];
27:
topWriteDataLU = curReadDataLU[159:128];
28:
topWriteDataLU = curReadDataLU[127:96];
29:
topWriteDataLU = curReadDataLU[95:64];
30:
topWriteDataLU = curReadDataLU[63:32];
31:
topWriteDataLU = curReadDataLU[31:0];
default:
topWriteDataLU = curReadDataLU[`PRECISION-1:0];
endcase
else
case (topWriteSel)
0:
topWriteDataLU = addResult31;
1:
topWriteDataLU = addResult30;
2:
topWriteDataLU = addResult29;
3:
topWriteDataLU = addResult28;
4:
topWriteDataLU = addResult27;
5:
topWriteDataLU = addResult26;
6:
topWriteDataLU = addResult25;
7:
topWriteDataLU = addResult24;
8:
topWriteDataLU = addResult23;
9:
topWriteDataLU = addResult22;
10:
topWriteDataLU = addResult21;
11:
topWriteDataLU = addResult20;
12:
topWriteDataLU = addResult19;
13:
topWriteDataLU = addResult18;
14:
topWriteDataLU = addResult17;
15:
topWriteDataLU = addResult16;
16:
topWriteDataLU = addResult15;
17:
topWriteDataLU = addResult14;
18:
topWriteDataLU = addResult13;
19:
topWriteDataLU = addResult12;
20:
topWriteDataLU = addResult11;
21:
topWriteDataLU = addResult10;
22:
topWriteDataLU = addResult9;
23:
topWriteDataLU = addResult8;
24:
topWriteDataLU = addResult7;
25:
topWriteDataLU = addResult6;
26:
topWriteDataLU = addResult5;
27:
topWriteDataLU = addResult4;
28:
topWriteDataLU = addResult3;
29:
topWriteDataLU = addResult2;
30:
topWriteDataLU = addResult1;
31:
topWriteDataLU = addResult0;
default:
topWriteDataLU = addResult0;
endcase
end
always @ (posedge clk)
begin
topWriteDataReg0 <= topWriteDataLU;
topReadAddrReg0 <= topReadAddrLU;
topWriteAddrReg0 <= topWriteAddrLU;
topWriteEnReg0 <= topWriteEnLU;
topWriteDataReg1 <= topWriteDataReg0;
topReadAddrReg1 <= topReadAddrReg0;
topWriteAddrReg1 <= topWriteAddrReg0;
topWriteEnReg1 <= topWriteEnReg0;
topWriteDataReg2 <= topWriteDataReg1;
topReadAddrReg2 <= topReadAddrReg1;
topWriteAddrReg2 <= topWriteAddrReg1;
topWriteEnReg2 <= topWriteEnReg1;
end
assign topWriteData = topWriteDataReg2;
assign topReadAddr = topReadAddrReg2;
assign topWriteAddr = topWriteAddrReg2;
assign topWriteEn = topWriteEnReg2;
always @ (posedge clk)
begin
topReadDataReg0 <= topReadDataLU;
end
assign topReadData = topReadDataReg0;
// connections to processing element
assign multA0 = leftReadDataLU[31:0];
assign multA1 = leftReadDataLU[63:32];
assign multA2 = leftReadDataLU[95:64];
assign multA3 = leftReadDataLU[127:96];
assign multA4 = leftReadDataLU[159:128];
assign multA5 = leftReadDataLU[191:160];
assign multA6 = leftReadDataLU[223:192];
assign multA7 = leftReadDataLU[255:224];
assign multA8 = leftReadDataLU[287:256];
assign multA9 = leftReadDataLU[319:288];
assign multA10 = leftReadDataLU[351:320];
assign multA11 = leftReadDataLU[383:352];
assign multA12 = leftReadDataLU[415:384];
assign multA13 = leftReadDataLU[447:416];
assign multA14 = leftReadDataLU[479:448];
assign multA15 = leftReadDataLU[511:480];
assign multA16 = leftReadDataLU[543:512];
assign multA17 = leftReadDataLU[575:544];
assign multA18 = leftReadDataLU[607:576];
assign multA19 = leftReadDataLU[639:608];
assign multA20 = leftReadDataLU[671:640];
assign multA21 = leftReadDataLU[703:672];
assign multA22 = leftReadDataLU[735:704];
assign multA23 = leftReadDataLU[767:736];
assign multA24 = leftReadDataLU[799:768];
assign multA25 = leftReadDataLU[831:800];
assign multA26 = leftReadDataLU[863:832];
assign multA27 = leftReadDataLU[895:864];
assign multA28 = leftReadDataLU[927:896];
assign multA29 = leftReadDataLU[959:928];
assign multA30 = leftReadDataLU[991:960];
assign multA31 = leftReadDataLU[1023:992];
assign addA0 = curReadDataLU[31:0];
assign addA1 = curReadDataLU[63:32];
assign addA2 = curReadDataLU[95:64];
assign addA3 = curReadDataLU[127:96];
assign addA4 = curReadDataLU[159:128];
assign addA5 = curReadDataLU[191:160];
assign addA6 = curReadDataLU[223:192];
assign addA7 = curReadDataLU[255:224];
assign addA8 = curReadDataLU[287:256];
assign addA9 = curReadDataLU[319:288];
assign addA10 = curReadDataLU[351:320];
assign addA11 = curReadDataLU[383:352];
assign addA12 = curReadDataLU[415:384];
assign addA13 = curReadDataLU[447:416];
assign addA14 = curReadDataLU[479:448];
assign addA15 = curReadDataLU[511:480];
assign addA16 = curReadDataLU[543:512];
assign addA17 = curReadDataLU[575:544];
assign addA18 = curReadDataLU[607:576];
assign addA19 = curReadDataLU[639:608];
assign addA20 = curReadDataLU[671:640];
assign addA21 = curReadDataLU[703:672];
assign addA22 = curReadDataLU[735:704];
assign addA23 = curReadDataLU[767:736];
assign addA24 = curReadDataLU[799:768];
assign addA25 = curReadDataLU[831:800];
assign addA26 = curReadDataLU[863:832];
assign addA27 = curReadDataLU[895:864];
assign addA28 = curReadDataLU[927:896];
assign addA29 = curReadDataLU[959:928];
assign addA30 = curReadDataLU[991:960];
assign addA31 = curReadDataLU[1023:992];
// connections to ports of the current blocks
assign rcWriteData[31:0] = (curWriteSel == 0) ? multResult0 : addResult0;
assign rcWriteData[63:32] = (curWriteSel == 0) ? multResult1 : addResult1;
assign rcWriteData[95:64] = (curWriteSel == 0) ? multResult2 : addResult2;
assign rcWriteData[127:96] = (curWriteSel == 0) ? multResult3 : addResult3;
assign rcWriteData[159:128] = (curWriteSel == 0) ? multResult4 : addResult4;
assign rcWriteData[191:160] = (curWriteSel == 0) ? multResult5 : addResult5;
assign rcWriteData[223:192] = (curWriteSel == 0) ? multResult6 : addResult6;
assign rcWriteData[255:224] = (curWriteSel == 0) ? multResult7 : addResult7;
assign rcWriteData[287:256] = (curWriteSel == 0) ? multResult8 : addResult8;
assign rcWriteData[319:288] = (curWriteSel == 0) ? multResult9 : addResult9;
assign rcWriteData[351:320] = (curWriteSel == 0) ? multResult10 : addResult10;
assign rcWriteData[383:352] = (curWriteSel == 0) ? multResult11 : addResult11;
assign rcWriteData[415:384] = (curWriteSel == 0) ? multResult12 : addResult12;
assign rcWriteData[447:416] = (curWriteSel == 0) ? multResult13 : addResult13;
assign rcWriteData[479:448] = (curWriteSel == 0) ? multResult14 : addResult14;
assign rcWriteData[511:480] = (curWriteSel == 0) ? multResult15 : addResult15;
assign rcWriteData[543:512] = (curWriteSel == 0) ? multResult16 : addResult16;
assign rcWriteData[575:544] = (curWriteSel == 0) ? multResult17 : addResult17;
assign rcWriteData[607:576] = (curWriteSel == 0) ? multResult18 : addResult18;
assign rcWriteData[639:608] = (curWriteSel == 0) ? multResult19 : addResult19;
assign rcWriteData[671:640] = (curWriteSel == 0) ? multResult20 : addResult20;
assign rcWriteData[703:672] = (curWriteSel == 0) ? multResult21 : addResult21;
assign rcWriteData[735:704] = (curWriteSel == 0) ? multResult22 : addResult22;
assign rcWriteData[767:736] = (curWriteSel == 0) ? multResult23 : addResult23;
assign rcWriteData[799:768] = (curWriteSel == 0) ? multResult24 : addResult24;
assign rcWriteData[831:800] = (curWriteSel == 0) ? multResult25 : addResult25;
assign rcWriteData[863:832] = (curWriteSel == 0) ? multResult26 : addResult26;
assign rcWriteData[895:864] = (curWriteSel == 0) ? multResult27 : addResult27;
assign rcWriteData[927:896] = (curWriteSel == 0) ? multResult28 : addResult28;
assign rcWriteData[959:928] = (curWriteSel == 0) ? multResult29 : addResult29;
assign rcWriteData[991:960] = (curWriteSel == 0) ? multResult30 : addResult30;
assign rcWriteData[1023:992] = (curWriteSel == 0) ? multResult31 : addResult31;
assign curWriteDataLU = rcWriteData;
always @ (posedge clk)
begin
if(curMemSel == 1'b0)
begin
curWriteData0Reg0 <= curWriteDataMem;
curWriteAddr0Reg0 <= curWriteAddrMem;
curReadAddr0Reg0 <= curReadAddrMem;
curWriteByteEn0Reg0 <= curWriteByteEnMem;
curWriteEn0Reg0 <= curWriteEnMem;
curWriteData1Reg0 <= curWriteDataLU;
curWriteAddr1Reg0 <= curWriteAddrLU;
curReadAddr1Reg0 <= curReadAddrLU;
curWriteByteEn1Reg0 <= curWriteByteEnLU;
curWriteEn1Reg0 <= curWriteEnLU;
end
else
begin
curWriteData0Reg0 <= curWriteDataLU;
curWriteAddr0Reg0 <= curWriteAddrLU;
curReadAddr0Reg0 <= curReadAddrLU;
curWriteByteEn0Reg0 <= curWriteByteEnLU;
curWriteEn0Reg0 <= curWriteEnLU;
curWriteData1Reg0 <= curWriteDataMem;
curWriteAddr1Reg0 <= curWriteAddrMem;
curReadAddr1Reg0 <= curReadAddrMem;
curWriteByteEn1Reg0 <= curWriteByteEnMem;
curWriteEn1Reg0 <= curWriteEnMem;
end
curWriteData0Reg1 <= curWriteData0Reg0;
curWriteAddr0Reg1 <= curWriteAddr0Reg0;
curReadAddr0Reg1 <= curReadAddr0Reg0;
curWriteByteEn0Reg1 <= curWriteByteEn0Reg0;
curWriteEn0Reg1 <= curWriteEn0Reg0;
curWriteData1Reg1 <= curWriteData1Reg0;
curWriteAddr1Reg1 <= curWriteAddr1Reg0;
curReadAddr1Reg1 <= curReadAddr1Reg0;
curWriteByteEn1Reg1 <= curWriteByteEn1Reg0;
curWriteEn1Reg1 <= curWriteEn1Reg0;
end
assign curWriteData0 = curWriteData0Reg1;
assign curWriteAddr0 = curWriteAddr0Reg1;
assign curReadAddr0 = curReadAddr0Reg1;
assign curWriteByteEn0 = curWriteByteEn0Reg1;
assign curWriteEn0 = curWriteEn0Reg1;
assign curWriteData1 = curWriteData1Reg1;
assign curWriteAddr1 = curWriteAddr1Reg1;
assign curReadAddr1 = curReadAddr1Reg1;
assign curWriteByteEn1 = curWriteByteEn1Reg1;
assign curWriteEn1 = curWriteEn1Reg1;
always @ (posedge clk)
begin
curReadData0Reg0 <= curReadData0;
curReadData1Reg0 <= curReadData1;
end
assign curReadDataMem = (curMemSel == 0) ? curReadData0Reg0 : curReadData1Reg0;
assign curReadDataLU = (curMemSel == 0) ? curReadData1Reg0 : curReadData0Reg0;
endmodule
module LUControl (clk, start_in, m_in, n_in, loop_in, mode_in, done,
curReadAddr, curWriteAddr, curWriteByteEn, curWriteEn, curWriteSel,
leftReadAddr, leftWriteAddr, leftWriteByteEn, leftWriteEn, leftWriteSel,
topReadAddr, topWriteAddr, topWriteEn, topWriteSel, topSourceSel, diagEn, MOSel, MOEn);
input clk, start_in;
input[7-1:0] m_in, n_in, loop_in;
input[1:0] mode_in;
output done;
output[128-1:0] curWriteByteEn;
output[7-1:0] curWriteAddr, curReadAddr;
output curWriteEn;
output[128-1:0] leftWriteByteEn;
output[7-1:0] leftWriteAddr, leftReadAddr;
output leftWriteEn;
output[12-1:0] topWriteAddr, topReadAddr;
output topWriteEn;
output leftWriteSel, curWriteSel, topSourceSel, diagEn;
output[5-1:0] topWriteSel;
output MOSel;
output MOEn;
reg start;
reg[15:0]startDelay;
reg[7-1:0] m, n, stop, stop2, loop;
reg[1:0] mode;
reg[3:0] nextState, currentState;
reg[1:0] nextRowState, currentRowState;
reg startFetchRow, doneFetchRow, loadRow, writeRow;
reg updateCounter;
reg[7-1:0] i1, j;
reg[12-1:0] nextTopIdx, nextTopIdx2, curTopIdx, nextTopIdxCounter;
reg[2-1:0] topIdx, topIdxCounter, mdivk;
reg[7-1:0] diagIdx, leftIdx, msIdx;
reg[5-1:0] imodk, i1modk;
reg[7-1:0] diagIdxCounter, leftIdxCounter, msIdxCounter, readRowCounter, topWriteCounter;
reg[128-1:0] byteEn, i1modkByteEn;
reg done;
reg[128-1:0] curWriteByteEn;
reg[7-1:0] curWriteAddr, curReadAddr;
reg curWriteEn;
reg[128-1:0] leftWriteByteEn;
reg[7-1:0] leftWriteAddr, leftReadAddr;
reg leftWriteEn;
reg[12-1:0] topWriteAddr, topReadAddr;
reg topWriteEn;
reg leftWriteSel, curWriteSel, topSourceSel, diagEn;
reg[5-1:0] topWriteSel;
reg MOSel;
reg MOEn;
reg[7-1:0] counter;
reg[6-1:0] divCounter;
reg[128-1:0]writeByteEnDelay0;
reg[128-1:0]writeByteEnDelay1;
reg[128-1:0]writeByteEnDelay2;
reg[128-1:0]writeByteEnDelay3;
reg[128-1:0]writeByteEnDelay4;
reg[128-1:0]writeByteEnDelay5;
reg[128-1:0]writeByteEnDelay6;
reg[128-1:0]writeByteEnDelay7;
reg[128-1:0]writeByteEnDelay8;
reg[128-1:0]writeByteEnDelay9;
reg[128-1:0]writeByteEnDelay10;
reg[128-1:0]writeByteEnDelay11;
reg[128-1:0]writeByteEnDelay12;
reg[128-1:0]writeByteEnDelay13;
reg[128-1:0]writeByteEnDelay14;
reg[128-1:0]writeByteEnDelay15;
reg[128-1:0]writeByteEnDelay16;
reg[128-1:0]writeByteEnDelay17;
reg[128-1:0]writeByteEnDelay18;
reg[128-1:0]writeByteEnDelay19;
reg[128-1:0]writeByteEnDelay20;
reg[128-1:0]writeByteEnDelay21;
reg[128-1:0]writeByteEnDelay22;
reg[128-1:0]writeByteEnDelay23;
reg[128-1:0]writeByteEnDelay24;
reg[128-1:0]writeByteEnDelay25;
reg[128-1:0]writeByteEnDelay26;
reg[128-1:0]writeByteEnDelay27;
reg[128-1:0]writeByteEnDelay28;
reg[128-1:0]writeByteEnDelay29;
reg[128-1:0]writeByteEnDelay30;
reg[128-1:0]writeByteEnDelay31;
reg[7-1:0]curWriteAddrDelay0;
reg[7-1:0]curWriteAddrDelay1;
reg[7-1:0]curWriteAddrDelay2;
reg[7-1:0]curWriteAddrDelay3;
reg[7-1:0]curWriteAddrDelay4;
reg[7-1:0]curWriteAddrDelay5;
reg[7-1:0]curWriteAddrDelay6;
reg[7-1:0]curWriteAddrDelay7;
reg[7-1:0]curWriteAddrDelay8;
reg[7-1:0]curWriteAddrDelay9;
reg[7-1:0]curWriteAddrDelay10;
reg[7-1:0]curWriteAddrDelay11;
reg[7-1:0]curWriteAddrDelay12;
reg[7-1:0]curWriteAddrDelay13;
reg[7-1:0]curWriteAddrDelay14;
reg[7-1:0]curWriteAddrDelay15;
reg[7-1:0]curWriteAddrDelay16;
reg[7-1:0]curWriteAddrDelay17;
reg[7-1:0]curWriteAddrDelay18;
reg[7-1:0]curWriteAddrDelay19;
reg[7-1:0]curWriteAddrDelay20;
reg[7-1:0]curWriteAddrDelay21;
reg[7-1:0]curWriteAddrDelay22;
reg[7-1:0]curWriteAddrDelay23;
reg[7-1:0]curWriteAddrDelay24;
reg[7-1:0]curWriteAddrDelay25;
reg[7-1:0]curWriteAddrDelay26;
reg[7-1:0]curWriteAddrDelay27;
reg[7-1:0]curWriteAddrDelay28;
reg[7-1:0]curWriteAddrDelay29;
reg[7-1:0]curWriteAddrDelay30;
reg[7-1:0]curWriteAddrDelay31;
reg[7-1:0]curReadAddrDelay0;
reg[7-1:0]curReadAddrDelay1;
reg[7-1:0]curReadAddrDelay2;
reg[7-1:0]curReadAddrDelay3;
reg[7-1:0]curReadAddrDelay4;
reg[7-1:0]curReadAddrDelay5;
reg[7-1:0]curReadAddrDelay6;
reg[7-1:0]curReadAddrDelay7;
reg[7-1:0]curReadAddrDelay8;
reg[7-1:0]curReadAddrDelay9;
reg[7-1:0]curReadAddrDelay10;
reg[7-1:0]curReadAddrDelay11;
reg[32-1:0]leftWriteEnDelay;
reg[32-1:0]curWriteEnDelay;
reg[5-1:0]leftWriteSelDelay;
reg[16-1:0]curWriteSelDelay;
reg[7-1:0]leftReadAddrDelay0;
reg[12-1:0]topWriteAddrDelay0;
reg[12-1:0]topWriteAddrDelay1;
reg[12-1:0]topWriteAddrDelay2;
reg[12-1:0]topWriteAddrDelay3;
reg[12-1:0]topWriteAddrDelay4;
reg[12-1:0]topWriteAddrDelay5;
reg[12-1:0]topWriteAddrDelay6;
reg[12-1:0]topWriteAddrDelay7;
reg[12-1:0]topWriteAddrDelay8;
reg[12-1:0]topWriteAddrDelay9;
reg[12-1:0]topWriteAddrDelay10;
reg[12-1:0]topWriteAddrDelay11;
reg[12-1:0]topWriteAddrDelay12;
reg[12-1:0]topWriteAddrDelay13;
reg[12-1:0]topWriteAddrDelay14;
reg[12-1:0]topWriteAddrDelay15;
reg[12-1:0]topWriteAddrDelay16;
reg[12-1:0]topWriteAddrDelay17;
reg[12-1:0]topWriteAddrDelay18;
reg[12-1:0]topWriteAddrDelay19;
reg[12-1:0]topWriteAddrDelay20;
reg[12-1:0]topWriteAddrDelay21;
reg[12-1:0]topWriteAddrDelay22;
reg[12-1:0]topWriteAddrDelay23;
reg[12-1:0]topWriteAddrDelay24;
reg[12-1:0]topWriteAddrDelay25;
reg[12-1:0]topWriteAddrDelay26;
reg[12-1:0]topWriteAddrDelay27;
reg[12-1:0]topWriteAddrDelay28;
reg[12-1:0]topWriteAddrDelay29;
reg[12-1:0]topWriteAddrDelay30;
reg[12-1:0]topWriteAddrDelay31;
reg [32-1:0]topWriteEnDelay;
reg [5-1:0]topSourceSelDelay;
reg[5-1:0]topWriteSelDelay0;
reg[5-1:0]topWriteSelDelay1;
reg[5-1:0]topWriteSelDelay2;
reg[5-1:0]topWriteSelDelay3;
reg[5-1:0]topWriteSelDelay4;
reg[5-1:0]topWriteSelDelay5;
reg[5-1:0]topWriteSelDelay6;
reg[5-1:0]topWriteSelDelay7;
reg[5-1:0]topWriteSelDelay8;
reg[5-1:0]topWriteSelDelay9;
reg[5-1:0]topWriteSelDelay10;
reg[5-1:0]topWriteSelDelay11;
reg[5-1:0]topWriteSelDelay12;
reg[5-1:0]topWriteSelDelay13;
reg[5-1:0]topWriteSelDelay14;
reg[5-1:0]topWriteSelDelay15;
reg[5-1:0]topWriteSelDelay16;
reg[5-1:0]topWriteSelDelay17;
reg[5-1:0]topWriteSelDelay18;
reg[5-1:0]topWriteSelDelay19;
reg[5-1:0]topWriteSelDelay20;
reg[5-1:0]topWriteSelDelay21;
reg[5-1:0]topWriteSelDelay22;
reg[5-1:0]topWriteSelDelay23;
reg[5-1:0]topWriteSelDelay24;
reg[5-1:0]topWriteSelDelay25;
reg[5-1:0]topWriteSelDelay26;
reg[5-1:0]topWriteSelDelay27;
reg[5-1:0]topWriteSelDelay28;
reg[5-1:0]topWriteSelDelay29;
reg[5-1:0]topWriteSelDelay30;
reg[5-1:0]topWriteSelDelay31;
reg [6-1:0]diagEnDelay;
reg[6-1:0]MOEnDelay;
reg [7-1:0]waitCycles;
// register store m, n and mdivk value
always @ (posedge clk)
begin
if (start_in == 1'b1)
begin
n <= n_in;
m <= m_in;
loop <= loop_in;
mode <= mode_in;
end
if (mode[0] == 1'b0 && m == loop)
stop <= loop;
else
stop <= loop+1'b1;
stop2 <= loop;
startDelay[0] <= start_in;
startDelay[1] <= startDelay[0];
startDelay[2] <= startDelay[1];
startDelay[3] <= startDelay[2];
startDelay[4] <= startDelay[3];
startDelay[5] <= startDelay[4];
startDelay[6] <= startDelay[5];
startDelay[7] <= startDelay[6];
startDelay[8] <= startDelay[7];
startDelay[9] <= startDelay[8];
startDelay[10] <= startDelay[9];
startDelay[11] <= startDelay[10];
startDelay[12] <= startDelay[11];
startDelay[13] <= startDelay[12];
startDelay[14] <= startDelay[13];
startDelay[15] <= startDelay[14];
start <= startDelay[15];
mdivk <= (m+32-1)>>5;
end
// registers that store values that are used in FSM, dependent on i and/or j
always @ (posedge clk)
begin
if (start == 1'b1)
topIdx <= 2'b00; //offset1divk;
else if (currentState == `cINCRE_I && i1modk == 32-1 && mode[0] == 1'b0)
topIdx <= topIdx + 1'b1;
if (start == 1'b1)
diagIdx <= 7'b0000000;
else if (currentState == `cSTORE_DIAG && mode == 2'b01)
diagIdx <= 2; else if (currentState == `cINCRE_I)
begin
if ((imodk == 32-1 && mode == 2'b00) || (i1modk == 32-1 && mode == 2'b01))
diagIdx <= diagIdx + 2 + 1;
else
diagIdx <= diagIdx + 2;
end
if (start == 1'b1)
leftIdx <= 7'b0000000;
else if (currentState == `cINCRE_I)
begin
if (i1modk == 32-1 && mode[0] == 1'b0)
leftIdx <= leftIdx + 2 + 1;
else
leftIdx <= leftIdx + 2;
end
if (start == 1'b1)
msIdx <= 7'b0000000;
else if (currentState == `cUPDATE_J)
if (mode[1] == 1'b0)
msIdx <= leftIdx + 2;
else
msIdx <= topIdx;
else if (nextRowState == `cLOAD_ROW_INC_J)
msIdx <= msIdx + 2;
if (start == 1'b1)
imodk <= 5'b00000;
else if (currentState == `cINCRE_I)
begin
if (imodk == 32-1)
imodk <= 5'b00000;
else
imodk <= imodk + 1'b1;
end
if (start == 1'b1)
i1modk <= 5'b00001;
else if (currentState == `cINCRE_I)
begin
if (i1modk == 32-1)
i1modk <= 5'b00000;
else
i1modk <= i1modk + 1'b1;
end
if (start == 1'b1)
nextTopIdx <= 12'b000000000000;
else if (currentState == `cINCRE_I)
if (mode[1] == 0)
nextTopIdx <= nextTopIdx + n + 1;
else
nextTopIdx <= nextTopIdx + n;
nextTopIdx2 <= nextTopIdx + n + 1;
if (start == 1'b1)
curTopIdx <= 12'b000000000001;
else if (currentState == `cUPDATE_J)
if (mode[1] == 1'b0)
curTopIdx <= nextTopIdx+1;
else
curTopIdx <= nextTopIdx;
else if (nextRowState == `cLOAD_ROW_INC_J)
curTopIdx <= curTopIdx + 1;
if (start == 1'b1)
i1 <= 7'b0000001;
else if (currentState == `cINCRE_I)
i1 <= i1 + 1;
if (start == 1'b1)
j <= 7'b0000000;
else if (currentState == `cUPDATE_J)
if (mode[1] == 1'b0)
j <= i1;
else
j <= 7'b0000000;
else if (currentRowState == `cLOAD_ROW_INC_J)
j <= j + 1;
// compute cycles of delay in FSM
if (currentState == `cSTORE_MO)
waitCycles <= 32-1;
else if (currentState == `cINCRE_I)
begin
if (i1 == stop-1)
if (mode[1] == 1'b1)
waitCycles <= 32-1 + 6 - 3;
else
waitCycles <= waitCycles + 5 - 2;
else if (mode == 2'b01 && waitCycles < 32-1 - (16-1) - 4)
waitCycles <= 32-1 - (16-1) - 4;
else if (mode == 2'b10 && i1modk == 32-1)
waitCycles <= 32-1 + 6 - 3;
else if (mode == 2'b00)
waitCycles <= waitCycles + 6 ;
end
else if (waitCycles >7'b0000000)
waitCycles <= waitCycles - 1;
end
// determining next state of main FSM
always @ (currentState or start or mode or m or n or counter or mdivk or topIdxCounter or doneFetchRow or divCounter or j or stop2 or waitCycles or stop or i1)
begin
case (currentState)
`cSETUP:
begin
if (start == 1'b1)
nextState = `cSTART;
else
nextState = `cSETUP;
updateCounter = 1'b1;
end
`cSTART:
begin
if (mode == 2'b00)
begin
if (m == 1 && n == 1)
nextState = `cDONE;
else
nextState = `cFETCH_COL;
end
else if (mode == 2'b01)
nextState = `cSTORE_DIAG;
else if (mode == 2'b10)
nextState = `cSTART_FETCH_ROW;
else
nextState = `cUPDATE_J;
updateCounter = 1'b1;
end
`cSTART_FETCH_ROW:
begin
if (counter == 5+6-1)
begin
if (mode == 2'b00)
nextState = `cSTORE_DIAG;
else
nextState = `cUPDATE_J;
end
else
nextState = `cSTART_FETCH_ROW;
updateCounter = 1'b0;
end
`cFETCH_COL:
if (counter >= mdivk-1)
begin
if (mode == 2'b00 && counter < 5)
begin
nextState = `cWAIT_COL;
updateCounter = 1'b0;
end
else
begin
if (mode == 2'b00)
nextState = `cSTART_FETCH_ROW;
else
nextState = `cFIND_REC;
updateCounter = 1'b1;
end
end
else
begin
nextState = `cFETCH_COL;
updateCounter = 1'b0;
end
`cWAIT_COL:
if (counter >= 5)
begin
if (mode == 0)
nextState = `cSTART_FETCH_ROW;
else
nextState = `cFIND_REC;
updateCounter = 1;
end
else
begin
nextState = `cWAIT_COL;
updateCounter = 0;
end
`cSTORE_DIAG:
begin
if (mode == 0)
nextState = `cFIND_REC;
else
nextState = `cFETCH_COL;
updateCounter = 1;
end
`cFIND_REC:
if (divCounter == 56)
begin
if (mode == 0)
nextState = `cMULT_COL;
else
nextState = `cSTORE_DIAG2;
updateCounter = 1;
end
else
begin
nextState = `cFIND_REC;
updateCounter = 0;
end
`cSTORE_DIAG2:
begin
nextState = `cMULT_COL;
updateCounter = 1;
end
`cMULT_COL:
if (topIdxCounter == mdivk-1)
begin
nextState = `cUPDATE_J;
updateCounter = 0;
end
else
begin
nextState = `cMULT_COL;
updateCounter = 0;
end
`cUPDATE_J:
if ((mode[1] == 1 || counter >= 16-1) && doneFetchRow == 1)
begin
nextState = `cSTORE_MO;
updateCounter = 1;
end
else
begin
nextState = `cUPDATE_J;
updateCounter = 0;
end
`cSTORE_MO:
begin
if (j == stop2)
begin
if (counter == mdivk-1+5-2)
nextState = `cDONE;
else
nextState = `cSTORE_MO;
updateCounter = 0;
end
else
begin
nextState = `cMULT_SUB;
updateCounter = 1;
end
end
`cMULT_SUB:
if (topIdxCounter == mdivk-1)
begin
if (j == n-1)
nextState = `cINCRE_I;
else
nextState = `cMULT_SUB;
updateCounter = 1;
end
else
begin
nextState = `cMULT_SUB;
updateCounter = 0;
end
`cINCRE_I:
begin
nextState = `cWAIT;
updateCounter = 1;
end
`cWAIT:
if (waitCycles == 0)
begin
if (i1 == stop)
nextState = `cDONE;
else if (mode == 0)
nextState = `cSTORE_DIAG;
else if (mode == 1)
nextState = `cFIND_REC;
else
nextState = `cUPDATE_J;
updateCounter = 1;
end
else
begin
nextState = `cWAIT;
updateCounter = 0;
end
`cDONE:
begin
nextState = `cDONE;
updateCounter = 0;
end
default:
begin
nextState = `cSETUP;
updateCounter = 1;
end
endcase
end
always @ (currentRowState or currentState or nextState or i1 or topIdxCounter or mdivk or msIdxCounter or readRowCounter or j or n or mode)
begin
if (currentRowState == `cDONE_FETCH_ROW)
doneFetchRow = 1;
else
doneFetchRow = 0;
if((nextState == `cSTART_FETCH_ROW && currentState != `cSTART_FETCH_ROW && i1 == 1))
startFetchRow = 1;
else
startFetchRow = 0;
if (currentState == `cMULT_SUB && topIdxCounter+2 == mdivk)
loadRow = 1;
else
loadRow = 0;
writeRow = (msIdxCounter == readRowCounter)&&(currentState==`cMULT_SUB)&&(j!=n)&&(mode[0] == 0);
end
// second FSM that controls the control signals to temp_top block
always @ (currentRowState or nextTopIdxCounter or n or startFetchRow or loadRow or topIdx or mdivk or nextState)
begin
case (currentRowState)
`cFETCH_ROW:
if (nextTopIdxCounter == n-1)
nextRowState = `cDONE_FETCH_ROW;
else
nextRowState = `cFETCH_ROW;
`cDONE_FETCH_ROW:
if (startFetchRow == 1)
nextRowState = `cFETCH_ROW;
else if (loadRow == 1 || (topIdx+1 == mdivk && nextState == `cMULT_SUB))
nextRowState = `cLOAD_ROW_INC_J;
else
nextRowState = `cDONE_FETCH_ROW;
`cLOAD_ROW_INC_J:
if (topIdx+1 == mdivk && nextState == `cMULT_SUB)
nextRowState = `cLOAD_ROW_INC_J;
else
nextRowState = `cDONE_FETCH_ROW;
default:
nextRowState = `cDONE_FETCH_ROW;
endcase
end
// address counters
always @ (posedge clk)
begin
if (updateCounter == 1 || currentRowState == `cLOAD_ROW_INC_J)
topIdxCounter <= topIdx;
else
topIdxCounter <= topIdxCounter + 1;
if (updateCounter == 1)
diagIdxCounter <= diagIdx;
else
diagIdxCounter <= diagIdxCounter + 1;
if (updateCounter == 1 || currentRowState == `cLOAD_ROW_INC_J)
msIdxCounter <= msIdx;
else
msIdxCounter <= msIdxCounter + 1;
if (updateCounter == 1 || currentRowState == `cLOAD_ROW_INC_J)
leftIdxCounter <= leftIdx;
else
leftIdxCounter <= leftIdxCounter + 1;
if (currentState == `cFETCH_COL || currentState == `cSTORE_MO)
topWriteCounter <= i1;
else if (writeRow == 1 || currentRowState == `cFETCH_ROW)
topWriteCounter <= topWriteCounter + 1;
if (currentState == `cSTART)
nextTopIdxCounter <= nextTopIdx;
else if (currentState == `cSTORE_MO)
if (mode[1] == 0)
nextTopIdxCounter <= nextTopIdx + n + 1;
else
nextTopIdxCounter <= nextTopIdx + n;
else if (writeRow == 1 || currentRowState == `cFETCH_ROW)
nextTopIdxCounter <= nextTopIdxCounter + 1;
if (currentState == `cSTART)
readRowCounter <= 0; //offsetdivk;
else if (currentState == `cSTORE_MO)
if (mode[1] == 0)
readRowCounter <= leftIdx + 2;
else
readRowCounter <= topIdx;
else if (writeRow == 1 || currentRowState == `cFETCH_ROW)
readRowCounter <= readRowCounter + 2;
if (updateCounter == 1)
counter <= 0;
else
counter <= counter + 1;
if (currentState == `cSTORE_DIAG || currentState == `cSTORE_DIAG2)
divCounter <= 0;
else if (divCounter < 56)
divCounter <= divCounter + 1;
case (i1modk)
5'b00000: begin
i1modkByteEn <= ~(128'b0) >> (5'b00000<<2'b10);
end
5'b00001: begin
i1modkByteEn <= ~(128'b0) >> (5'b00001<<2'b10);
end
5'b00010: begin
i1modkByteEn <= ~(128'b0) >> (5'b00010<<2'b10);
end
5'b00011: begin
i1modkByteEn <= ~(128'b0) >> (5'b00011<<2'b10);
end
5'b00100: begin
i1modkByteEn <= ~(128'b0) >> (5'b00100<<2'b10);
end
5'b00101: begin
i1modkByteEn <= ~(128'b0) >> (5'b00101<<2'b10);
end
5'b00110: begin
i1modkByteEn <= ~(128'b0) >> (5'b00110<<2'b10);
end
5'b00111: begin
i1modkByteEn <= ~(128'b0) >> (5'b00111<<2'b10);
end
5'b01000: begin
i1modkByteEn <= ~(128'b0) >> (5'b01000<<2'b10);
end
5'b01001: begin
i1modkByteEn <= ~(128'b0) >> (5'b01001<<2'b10);
end
5'b01010: begin
i1modkByteEn <= ~(128'b0) >> (5'b01010<<2'b10);
end
5'b01011: begin
i1modkByteEn <= ~(128'b0) >> (5'b01011<<2'b10);
end
5'b01100: begin
i1modkByteEn <= ~(128'b0) >> (5'b01100<<2'b10);
end
5'b01101: begin
i1modkByteEn <= ~(128'b0) >> (5'b01101<<2'b10);
end
5'b01110: begin
i1modkByteEn <= ~(128'b0) >> (5'b01110<<2'b10);
end
5'b01111: begin
i1modkByteEn <= ~(128'b0) >> (5'b01111<<2'b10);
end
5'b10000: begin
i1modkByteEn <= ~(128'b0) >> (5'b10000<<2'b10);
end
5'b10001: begin
i1modkByteEn <= ~(128'b0) >> (5'b10001<<2'b10);
end
5'b10010: begin
i1modkByteEn <= ~(128'b0) >> (5'b10010<<2'b10);
end
5'b10011: begin
i1modkByteEn <= ~(128'b0) >> (5'b10011<<2'b10);
end
5'b10100: begin
i1modkByteEn <= ~(128'b0) >> (5'b10100<<2'b10);
end
5'b10101: begin
i1modkByteEn <= ~(128'b0) >> (5'b10101<<2'b10);
end
5'b10110: begin
i1modkByteEn <= ~(128'b0) >> (5'b10110<<2'b10);
end
5'b10111: begin
i1modkByteEn <= ~(128'b0) >> (5'b10111<<2'b10);
end
5'b11000: begin
i1modkByteEn <= ~(128'b0) >> (5'b11000<<2'b10);
end
5'b11001: begin
i1modkByteEn <= ~(128'b0) >> (5'b11001<<2'b10);
end
5'b11010: begin
i1modkByteEn <= ~(128'b0) >> (5'b11010<<2'b10);
end
5'b11011: begin
i1modkByteEn <= ~(128'b0) >> (5'b11011<<2'b10);
end
5'b11100: begin
i1modkByteEn <= ~(128'b0) >> (5'b11100<<2'b10);
end
5'b11101: begin
i1modkByteEn <= ~(128'b0) >> (5'b11101<<2'b10);
end
5'b11110: begin
i1modkByteEn <= ~(128'b0) >> (5'b11110<<2'b10);
end
5'b11111: begin
i1modkByteEn <= ~(128'b0) >> (5'b11111<<2'b10);
end
default: begin
i1modkByteEn <= ~(128'b0);
end
endcase
end
// compute Byte Enable
always @ (posedge clk)
begin
if ((nextState == `cMULT_COL && currentState != `cMULT_COL) || (currentState == `cSTORE_MO) || currentRowState == `cLOAD_ROW_INC_J)
byteEn <= i1modkByteEn;
else
byteEn <= 128'b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111;
end
// update FSM state register
always @ (posedge clk)
begin
if (start_in == 1'b1)
currentState <= `cSETUP;
else
currentState <= nextState;
if (start == 1'b1)
currentRowState <= `cDONE_FETCH_ROW;
else
currentRowState <= nextRowState;
end
// delay register for control signals
// control signals are delayed to match latency of operations and/or memory access
always @ (posedge clk)
begin
curReadAddrDelay0 <= curReadAddrDelay1;
curReadAddrDelay1 <= curReadAddrDelay2;
curReadAddrDelay2 <= curReadAddrDelay3;
curReadAddrDelay3 <= curReadAddrDelay4;
curReadAddrDelay4 <= curReadAddrDelay5;
curReadAddrDelay5 <= curReadAddrDelay6;
curReadAddrDelay6 <= curReadAddrDelay7;
curReadAddrDelay7 <= curReadAddrDelay8;
curReadAddrDelay8 <= curReadAddrDelay9;
curReadAddrDelay9 <= curReadAddrDelay10;
curReadAddrDelay10 <= curReadAddrDelay11;
curReadAddrDelay11 <= msIdxCounter;
curWriteAddrDelay0 <= curWriteAddrDelay1;
curWriteAddrDelay1 <= curWriteAddrDelay2;
curWriteAddrDelay2 <= curWriteAddrDelay3;
curWriteAddrDelay3 <= curWriteAddrDelay4;
if (currentState == `cFETCH_COL)
curWriteAddrDelay4 <= diagIdxCounter;
else
curWriteAddrDelay4 <= curWriteAddrDelay5;
curWriteAddrDelay5 <= curWriteAddrDelay6;
curWriteAddrDelay6 <= curWriteAddrDelay7;
curWriteAddrDelay7 <= curWriteAddrDelay8;
curWriteAddrDelay8 <= curWriteAddrDelay9;
curWriteAddrDelay9 <= curWriteAddrDelay10;
curWriteAddrDelay10 <= curWriteAddrDelay11;
curWriteAddrDelay11 <= curWriteAddrDelay12;
curWriteAddrDelay12 <= curWriteAddrDelay13;
curWriteAddrDelay13 <= curWriteAddrDelay14;
curWriteAddrDelay14 <= curWriteAddrDelay15;
if (currentState == `cMULT_COL)
curWriteAddrDelay15 <= leftIdxCounter;
else
curWriteAddrDelay15 <= curWriteAddrDelay16;
curWriteAddrDelay16 <= curWriteAddrDelay17;
curWriteAddrDelay17 <= curWriteAddrDelay18;
curWriteAddrDelay18 <= curWriteAddrDelay19;
curWriteAddrDelay19 <= curWriteAddrDelay20;
curWriteAddrDelay20 <= curWriteAddrDelay21;
curWriteAddrDelay21 <= curWriteAddrDelay22;
curWriteAddrDelay22 <= curWriteAddrDelay23;
curWriteAddrDelay23 <= curWriteAddrDelay24;
curWriteAddrDelay24 <= curWriteAddrDelay25;
curWriteAddrDelay25 <= curWriteAddrDelay26;
curWriteAddrDelay26 <= curWriteAddrDelay27;
curWriteAddrDelay27 <= curWriteAddrDelay28;
curWriteAddrDelay28 <= curWriteAddrDelay29;
curWriteAddrDelay29 <= curWriteAddrDelay30;
curWriteAddrDelay30 <= curWriteAddrDelay31;
curWriteAddrDelay31 <= msIdxCounter;
writeByteEnDelay0 <= writeByteEnDelay1;
writeByteEnDelay1 <= writeByteEnDelay2;
writeByteEnDelay2 <= writeByteEnDelay3;
writeByteEnDelay3 <= writeByteEnDelay4;
if (mode[0] == 1'b1)
writeByteEnDelay4 <= ~0;
else if (currentState == `cFETCH_COL)
writeByteEnDelay4 <= byteEn;
else
writeByteEnDelay4 <= writeByteEnDelay5;
writeByteEnDelay5 <= writeByteEnDelay6;
writeByteEnDelay6 <= writeByteEnDelay7;
writeByteEnDelay7 <= writeByteEnDelay8;
writeByteEnDelay8 <= writeByteEnDelay9;
writeByteEnDelay9 <= writeByteEnDelay10;
writeByteEnDelay10 <= writeByteEnDelay11;
writeByteEnDelay11 <= writeByteEnDelay12;
writeByteEnDelay12 <= writeByteEnDelay13;
writeByteEnDelay13 <= writeByteEnDelay14;
writeByteEnDelay14 <= writeByteEnDelay15;
if (currentState == `cMULT_COL)
writeByteEnDelay15 <= byteEn;
else
writeByteEnDelay15 <= writeByteEnDelay16;
writeByteEnDelay16 <= writeByteEnDelay17;
writeByteEnDelay17 <= writeByteEnDelay18;
writeByteEnDelay18 <= writeByteEnDelay19;
writeByteEnDelay19 <= writeByteEnDelay20;
writeByteEnDelay20 <= writeByteEnDelay21;
writeByteEnDelay21 <= writeByteEnDelay22;
writeByteEnDelay22 <= writeByteEnDelay23;
writeByteEnDelay23 <= writeByteEnDelay24;
writeByteEnDelay24 <= writeByteEnDelay25;
writeByteEnDelay25 <= writeByteEnDelay26;
writeByteEnDelay26 <= writeByteEnDelay27;
writeByteEnDelay27 <= writeByteEnDelay28;
writeByteEnDelay28 <= writeByteEnDelay29;
writeByteEnDelay29 <= writeByteEnDelay30;
writeByteEnDelay30 <= writeByteEnDelay31;
writeByteEnDelay31 <= byteEn;
curWriteSelDelay[0] <= curWriteSelDelay[1];
curWriteSelDelay[1] <= curWriteSelDelay[2];
curWriteSelDelay[2] <= curWriteSelDelay[3];
curWriteSelDelay[3] <= curWriteSelDelay[4];
curWriteSelDelay[4] <= curWriteSelDelay[5];
curWriteSelDelay[5] <= curWriteSelDelay[6];
curWriteSelDelay[6] <= curWriteSelDelay[7];
curWriteSelDelay[7] <= curWriteSelDelay[8];
curWriteSelDelay[8] <= curWriteSelDelay[9];
curWriteSelDelay[9] <= curWriteSelDelay[10];
curWriteSelDelay[10] <= curWriteSelDelay[11];
curWriteSelDelay[11] <= curWriteSelDelay[12];
curWriteSelDelay[12] <= curWriteSelDelay[13];
curWriteSelDelay[13] <= curWriteSelDelay[14];
curWriteSelDelay[14] <= curWriteSelDelay[15];
if (currentState == `cMULT_COL)
curWriteSelDelay[15] <= 1'b0;
else
curWriteSelDelay[15] <= 1'b1;
curWriteEnDelay[0] <= curWriteEnDelay[1];
curWriteEnDelay[1] <= curWriteEnDelay[2];
curWriteEnDelay[2] <= curWriteEnDelay[3];
curWriteEnDelay[3] <= curWriteEnDelay[4];
curWriteEnDelay[4] <= curWriteEnDelay[5];
curWriteEnDelay[5] <= curWriteEnDelay[6];
curWriteEnDelay[6] <= curWriteEnDelay[7];
curWriteEnDelay[7] <= curWriteEnDelay[8];
curWriteEnDelay[8] <= curWriteEnDelay[9];
curWriteEnDelay[9] <= curWriteEnDelay[10];
curWriteEnDelay[10] <= curWriteEnDelay[11];
curWriteEnDelay[11] <= curWriteEnDelay[12];
curWriteEnDelay[12] <= curWriteEnDelay[13];
curWriteEnDelay[13] <= curWriteEnDelay[14];
curWriteEnDelay[14] <= curWriteEnDelay[15];
if (currentState == `cMULT_COL)
curWriteEnDelay[15] <= 1'b1;
else
curWriteEnDelay[15] <= curWriteEnDelay[16];
curWriteEnDelay[16] <= curWriteEnDelay[17];
curWriteEnDelay[17] <= curWriteEnDelay[18];
curWriteEnDelay[18] <= curWriteEnDelay[19];
curWriteEnDelay[19] <= curWriteEnDelay[20];
curWriteEnDelay[20] <= curWriteEnDelay[21];
curWriteEnDelay[21] <= curWriteEnDelay[22];
curWriteEnDelay[22] <= curWriteEnDelay[23];
curWriteEnDelay[23] <= curWriteEnDelay[24];
curWriteEnDelay[24] <= curWriteEnDelay[25];
curWriteEnDelay[25] <= curWriteEnDelay[26];
curWriteEnDelay[26] <= curWriteEnDelay[27];
curWriteEnDelay[27] <= curWriteEnDelay[28];
curWriteEnDelay[28] <= curWriteEnDelay[29];
curWriteEnDelay[29] <= curWriteEnDelay[30];
curWriteEnDelay[30] <= curWriteEnDelay[31];
if (currentState == `cMULT_SUB)
curWriteEnDelay[31] <= 1'b1;
else
curWriteEnDelay[31] <= 1'b0;
leftWriteSelDelay[0] <= leftWriteSelDelay[1];
leftWriteSelDelay[1] <= leftWriteSelDelay[2];
leftWriteSelDelay[2] <= leftWriteSelDelay[3];
leftWriteSelDelay[3] <= leftWriteSelDelay[4];
if (currentState == `cFETCH_COL)
leftWriteSelDelay[4] <= 1'b0;
else
leftWriteSelDelay[4] <= 1'b1;
leftWriteEnDelay[0] <= leftWriteEnDelay[1];
leftWriteEnDelay[1] <= leftWriteEnDelay[2];
leftWriteEnDelay[2] <= leftWriteEnDelay[3];
leftWriteEnDelay[3] <= leftWriteEnDelay[4];
if (currentState == `cFETCH_COL)
leftWriteEnDelay[4] <= 1'b1;
else
leftWriteEnDelay[4] <= leftWriteEnDelay[5];
leftWriteEnDelay[5] <= leftWriteEnDelay[6];
leftWriteEnDelay[6] <= leftWriteEnDelay[7];
leftWriteEnDelay[7] <= leftWriteEnDelay[8];
leftWriteEnDelay[8] <= leftWriteEnDelay[9];
leftWriteEnDelay[9] <= leftWriteEnDelay[10];
leftWriteEnDelay[10] <= leftWriteEnDelay[11];
leftWriteEnDelay[11] <= leftWriteEnDelay[12];
leftWriteEnDelay[12] <= leftWriteEnDelay[13];
leftWriteEnDelay[13] <= leftWriteEnDelay[14];
leftWriteEnDelay[14] <= leftWriteEnDelay[15];
if (currentState == `cMULT_COL)
leftWriteEnDelay[15] <= 1'b1;
else
leftWriteEnDelay[15] <= leftWriteEnDelay[16];
leftWriteEnDelay[16] <= leftWriteEnDelay[17];
leftWriteEnDelay[17] <= leftWriteEnDelay[18];
leftWriteEnDelay[18] <= leftWriteEnDelay[19];
leftWriteEnDelay[19] <= leftWriteEnDelay[20];
leftWriteEnDelay[20] <= leftWriteEnDelay[21];
leftWriteEnDelay[21] <= leftWriteEnDelay[22];
leftWriteEnDelay[22] <= leftWriteEnDelay[23];
leftWriteEnDelay[23] <= leftWriteEnDelay[24];
leftWriteEnDelay[24] <= leftWriteEnDelay[25];
leftWriteEnDelay[25] <= leftWriteEnDelay[26];
leftWriteEnDelay[26] <= leftWriteEnDelay[27];
leftWriteEnDelay[27] <= leftWriteEnDelay[28];
leftWriteEnDelay[28] <= leftWriteEnDelay[29];
leftWriteEnDelay[29] <= leftWriteEnDelay[30];
leftWriteEnDelay[30] <= leftWriteEnDelay[31];
if (currentState == `cMULT_SUB && (mode == 0 || (mode == 1 && j == i1)))
leftWriteEnDelay[31] <= 1'b1;
else
leftWriteEnDelay[31] <= 1'b0;
topWriteAddrDelay0 <= topWriteAddrDelay1;
topWriteAddrDelay1 <= topWriteAddrDelay2;
topWriteAddrDelay2 <= topWriteAddrDelay3;
topWriteAddrDelay3 <= topWriteAddrDelay4;
if (currentRowState == `cFETCH_ROW)
topWriteAddrDelay4 <= nextTopIdxCounter;
else
topWriteAddrDelay4 <= topWriteAddrDelay5;
topWriteAddrDelay5 <= topWriteAddrDelay6;
topWriteAddrDelay6 <= topWriteAddrDelay7;
topWriteAddrDelay7 <= topWriteAddrDelay8;
topWriteAddrDelay8 <= topWriteAddrDelay9;
topWriteAddrDelay9 <= topWriteAddrDelay10;
topWriteAddrDelay10 <= topWriteAddrDelay11;
topWriteAddrDelay11 <= topWriteAddrDelay12;
topWriteAddrDelay12 <= topWriteAddrDelay13;
topWriteAddrDelay13 <= topWriteAddrDelay14;
topWriteAddrDelay14 <= topWriteAddrDelay15;
topWriteAddrDelay15 <= topWriteAddrDelay16;
topWriteAddrDelay16 <= topWriteAddrDelay17;
topWriteAddrDelay17 <= topWriteAddrDelay18;
topWriteAddrDelay18 <= topWriteAddrDelay19;
topWriteAddrDelay19 <= topWriteAddrDelay20;
topWriteAddrDelay20 <= topWriteAddrDelay21;
topWriteAddrDelay21 <= topWriteAddrDelay22;
topWriteAddrDelay22 <= topWriteAddrDelay23;
topWriteAddrDelay23 <= topWriteAddrDelay24;
topWriteAddrDelay24 <= topWriteAddrDelay25;
topWriteAddrDelay25 <= topWriteAddrDelay26;
topWriteAddrDelay26 <= topWriteAddrDelay27;
topWriteAddrDelay27 <= topWriteAddrDelay28;
topWriteAddrDelay28 <= topWriteAddrDelay29;
topWriteAddrDelay29 <= topWriteAddrDelay30;
topWriteAddrDelay30 <= topWriteAddrDelay31;
topWriteAddrDelay31 <= nextTopIdxCounter;
topWriteEnDelay[0] <= topWriteEnDelay[1];
topWriteEnDelay[1] <= topWriteEnDelay[2];
topWriteEnDelay[2] <= topWriteEnDelay[3];
topWriteEnDelay[3] <= topWriteEnDelay[4];
if (currentRowState == `cFETCH_ROW)
topWriteEnDelay[4] <= 1'b1;
else
topWriteEnDelay[4] <= topWriteEnDelay[5];
topWriteEnDelay[5] <= topWriteEnDelay[6];
topWriteEnDelay[6] <= topWriteEnDelay[7];
topWriteEnDelay[7] <= topWriteEnDelay[8];
topWriteEnDelay[8] <= topWriteEnDelay[9];
topWriteEnDelay[9] <= topWriteEnDelay[10];
topWriteEnDelay[10] <= topWriteEnDelay[11];
topWriteEnDelay[11] <= topWriteEnDelay[12];
topWriteEnDelay[12] <= topWriteEnDelay[13];
topWriteEnDelay[13] <= topWriteEnDelay[14];
topWriteEnDelay[14] <= topWriteEnDelay[15];
topWriteEnDelay[15] <= topWriteEnDelay[16];
topWriteEnDelay[16] <= topWriteEnDelay[17];
topWriteEnDelay[17] <= topWriteEnDelay[18];
topWriteEnDelay[18] <= topWriteEnDelay[19];
topWriteEnDelay[19] <= topWriteEnDelay[20];
topWriteEnDelay[20] <= topWriteEnDelay[21];
topWriteEnDelay[21] <= topWriteEnDelay[22];
topWriteEnDelay[22] <= topWriteEnDelay[23];
topWriteEnDelay[23] <= topWriteEnDelay[24];
topWriteEnDelay[24] <= topWriteEnDelay[25];
topWriteEnDelay[25] <= topWriteEnDelay[26];
topWriteEnDelay[26] <= topWriteEnDelay[27];
topWriteEnDelay[27] <= topWriteEnDelay[28];
topWriteEnDelay[28] <= topWriteEnDelay[29];
topWriteEnDelay[29] <= topWriteEnDelay[30];
topWriteEnDelay[30] <= topWriteEnDelay[31];
topWriteEnDelay[31] <= writeRow;
topWriteSelDelay0 <= topWriteSelDelay1;
topWriteSelDelay1 <= topWriteSelDelay2;
topWriteSelDelay2 <= topWriteSelDelay3;
topWriteSelDelay3 <= topWriteSelDelay4;
if (currentRowState == `cFETCH_ROW || currentState == `cUPDATE_J && i1 == 1)
topWriteSelDelay4 <= imodk;
else
topWriteSelDelay4 <= topWriteSelDelay5;
topWriteSelDelay5 <= topWriteSelDelay6;
topWriteSelDelay6 <= topWriteSelDelay7;
topWriteSelDelay7 <= topWriteSelDelay8;
topWriteSelDelay8 <= topWriteSelDelay9;
topWriteSelDelay9 <= topWriteSelDelay10;
topWriteSelDelay10 <= topWriteSelDelay11;
topWriteSelDelay11 <= topWriteSelDelay12;
topWriteSelDelay12 <= topWriteSelDelay13;
topWriteSelDelay13 <= topWriteSelDelay14;
topWriteSelDelay14 <= topWriteSelDelay15;
topWriteSelDelay15 <= topWriteSelDelay16;
topWriteSelDelay16 <= topWriteSelDelay17;
topWriteSelDelay17 <= topWriteSelDelay18;
topWriteSelDelay18 <= topWriteSelDelay19;
topWriteSelDelay19 <= topWriteSelDelay20;
topWriteSelDelay20 <= topWriteSelDelay21;
topWriteSelDelay21 <= topWriteSelDelay22;
topWriteSelDelay22 <= topWriteSelDelay23;
topWriteSelDelay23 <= topWriteSelDelay24;
topWriteSelDelay24 <= topWriteSelDelay25;
topWriteSelDelay25 <= topWriteSelDelay26;
topWriteSelDelay26 <= topWriteSelDelay27;
topWriteSelDelay27 <= topWriteSelDelay28;
topWriteSelDelay28 <= topWriteSelDelay29;
topWriteSelDelay29 <= topWriteSelDelay30;
topWriteSelDelay30 <= topWriteSelDelay31;
topWriteSelDelay31 <= i1modk;
topSourceSelDelay[0] <= topSourceSelDelay[1];
topSourceSelDelay[1] <= topSourceSelDelay[2];
topSourceSelDelay[2] <= topSourceSelDelay[3];
topSourceSelDelay[3] <= topSourceSelDelay[4];
if (start == 1'b1)
topSourceSelDelay[4] <= 1'b0;
else if (currentState == `cSTORE_MO)
topSourceSelDelay[4] <= 1'b1;
leftReadAddrDelay0 <= leftIdxCounter;
diagEnDelay[0] <= diagEnDelay[1];
diagEnDelay[1] <= diagEnDelay[2];
diagEnDelay[2] <= diagEnDelay[3];
diagEnDelay[3] <= diagEnDelay[4];
diagEnDelay[4] <= diagEnDelay[5];
diagEnDelay[5] <= (currentState == `cSTORE_DIAG || currentState == `cSTORE_DIAG2);
MOEnDelay[0] <= MOEnDelay[1];
MOEnDelay[1] <= MOEnDelay[2];
MOEnDelay[2] <= MOEnDelay[3];
MOEnDelay[3] <= MOEnDelay[4];
MOEnDelay[4] <= MOEnDelay[5];
if (currentState == `cSTORE_MO || currentRowState == `cLOAD_ROW_INC_J)
MOEnDelay[5] <= 1'b1;
else
MOEnDelay[5] <= 1'b0;
end
// output contorl signals
always @ (posedge clk)
begin
if (currentState == `cFETCH_COL)
curReadAddr <= diagIdxCounter;
else if (currentRowState == `cFETCH_ROW)
curReadAddr <= readRowCounter;
else
curReadAddr <= curReadAddrDelay0;
curWriteAddr <= curWriteAddrDelay0;
curWriteByteEn <= writeByteEnDelay0;
curWriteSel <= curWriteSelDelay;
curWriteEn <= curWriteEnDelay;
if (currentState == `cMULT_COL)
leftReadAddr <= leftIdxCounter;
else
leftReadAddr <= leftReadAddrDelay0;
leftWriteAddr <= curWriteAddrDelay0;
leftWriteByteEn <= writeByteEnDelay0;
leftWriteSel <= leftWriteSelDelay;
leftWriteEn <= leftWriteEnDelay;
if (currentState == `cSTORE_DIAG)
topReadAddr <= nextTopIdx;
else if (currentState == `cSTORE_DIAG2)
topReadAddr <= nextTopIdx2;
else
topReadAddr <= curTopIdx;
topWriteAddr <= topWriteAddrDelay0;
topWriteEn <= topWriteEnDelay;
topWriteSel <= topWriteSelDelay0;
topSourceSel <= topSourceSelDelay;
MOSel <= ~(currentState == `cFIND_REC);
if (currentState == `cFIND_REC)
MOEn <= 1'b1;
else
MOEn <= MOEnDelay;
diagEn <= diagEnDelay;
if (currentState == `cDONE)
done <= 1'b1;
else
done <= 1'b0;
end
endmodule
module ram (
byteena_a,
clk,
data,
rdaddress,
wraddress,
wren,
q
);
input [`RAMNUMBYTES-1:0] byteena_a;
input clk;
input [`RAMWIDTH-1:0] data;
input [`rRAMSIZEWIDTH-1:0] rdaddress;
input [`rRAMSIZEWIDTH-1:0] wraddress;
input wren;
output [`RAMWIDTH-1:0] q;
wire [`RAMWIDTH-1:0] value_out;
wire [`RAMWIDTH-1:0] subwire;
assign q = subwire | dummy;
wire [`RAMWIDTH-1:0] uselessdata;
assign uselessdata = 1024'b0;
wire j;
assign j = |byteena_a;
wire [`RAMWIDTH-1:0]dummy;
assign dummy = value_out & 1024'b0;
dual_port_ram inst1(
.clk (clk),
.we1(wren),
.we2(1'b0),
.data1(data),
.data2(uselessdata),
.out1(value_out),
.out2(subwire),
.addr1(wraddress),
.addr2(rdaddress));
endmodule
module ram1 (
byteena_a,
clk,
data,
rdaddress,
wraddress,
wren,
q
);
input [`RAMNUMBYTES-1:0] byteena_a;
input clk;
input [`RAMWIDTH-1:0] data;
input [`rRAMSIZEWIDTH-1:0] rdaddress;
input [`rRAMSIZEWIDTH-1:0] wraddress;
input wren;
output [`RAMWIDTH-1:0] q;
wire [`RAMWIDTH-1:0] value_out;
wire [`RAMWIDTH-1:0] subwire;
assign q = subwire | dummy;
wire [`RAMWIDTH-1:0] uselessdata;
assign uselessdata = 1024'b0;
wire j;
assign j = |byteena_a;
wire [`RAMWIDTH-1:0]dummy;
assign dummy = value_out & 1024'b0;
dual_port_ram inst1(
.clk (clk),
.we1(wren),
.we2(1'b0),
.data1(data),
.data2(uselessdata),
.out1(value_out),
.out2(subwire),
.addr1(wraddress),
.addr2(rdaddress));
endmodule
module ram2 (
byteena_a,
clk,
data,
rdaddress,
wraddress,
wren,
q
);
input [`RAMNUMBYTES-1:0] byteena_a;
input clk;
input [`RAMWIDTH-1:0] data;
input [`rRAMSIZEWIDTH-1:0] rdaddress;
input [`rRAMSIZEWIDTH-1:0] wraddress;
input wren;
output [`RAMWIDTH-1:0] q;
wire [`RAMWIDTH-1:0] value_out;
wire [`RAMWIDTH-1:0] subwire;
assign q = subwire | dummy;
wire [`RAMWIDTH-1:0] uselessdata;
assign uselessdata = 1024'b0;
wire j;
assign j = |byteena_a;
wire [`RAMWIDTH-1:0]dummy;
assign dummy = value_out & 1024'b0;
dual_port_ram inst1(
.clk (clk),
.we1(wren),
.we2(1'b0),
.data1(data),
.data2(uselessdata),
.out1(value_out),
.out2(subwire),
.addr1(wraddress),
.addr2(rdaddress));
endmodule
module ram3 (
byteena_a,
clk,
data,
rdaddress,
wraddress,
wren,
q
);
input [`RAMNUMBYTES-1:0] byteena_a;
input clk;
input [`RAMWIDTH-1:0] data;
input [`rRAMSIZEWIDTH-1:0] rdaddress;
input [`rRAMSIZEWIDTH-1:0] wraddress;
input wren;
output [`RAMWIDTH-1:0] q;
wire [`RAMWIDTH-1:0] value_out;
wire [`RAMWIDTH-1:0] subwire;
assign q = subwire | dummy;
wire [`RAMWIDTH-1:0] uselessdata;
assign uselessdata = 1024'b0;
wire j;
assign j = |byteena_a;
wire [`RAMWIDTH-1:0]dummy;
assign dummy = value_out & 1024'b0;
dual_port_ram inst1(
.clk (clk),
.we1(wren),
.we2(1'b0),
.data1(data),
.data2(uselessdata),
.out1(value_out),
.out2(subwire),
.addr1(wraddress),
.addr2(rdaddress));
endmodule
module top_ram (
clk,
data,
rdaddress,
wraddress,
wren,
q
);
//parameter TOPSIZE = 4096, TOPSIZEWIDTH = 12, TOPWIDTH = 32;
input clk;
input [32-1:0] data;
input [12-1:0] rdaddress;
input [12-1:0] wraddress;
input wren;
output [32-1:0] q;
wire [32-1:0] sub_wire0;
wire [32-1:0] q;
wire [32-1:0] junk_output;
assign q = sub_wire0 | dummy;
wire[32-1:0] dummy;
assign dummy = junk_output & 32'b0;
dual_port_ram_4096x32 inst2(
.clk (clk),
.we1(wren),
.we2(1'b0),
.data1(data),
.data2(data),
.out1(junk_output),
.out2(sub_wire0),
.addr1(wraddress),
.addr2(rdaddress));
endmodule
module mult_add (clk, A, B, C, mult_result, add_result);
//parameter PRECISION = 32;
input clk;
input [32-1:0] A, B, C;
output [32-1:0] mult_result, add_result;
reg [32-1:0] mult_result;
reg [32-1:0] add_result;
wire [32-1:0] mult_comp_result;
reg [32-1:0] add_a, add_b;
wire [32-1:0] addition_result;
wire [31:0] dummy_wire;
assign dummy_wire = mult_comp_result>>2'b10;
//divsp MUL(.clk(clk), .rmode(2'b00), .fpu_op(3'b010), .opa(A), .opb(B), .ans(mult_comp_result) );
wire [4:0]dummy_wire_2;
fpmul MUL(.clk(clk), .a(A), .b(B), .y_out(mult_comp_result), .control(2'b00), .flags(dummy_wire_2));
fpu_add ADD(.clock(clk), .a1(C), .b1(dummy_wire), .sum(addition_result));
always @ (posedge clk)
begin
add_result <= addition_result;
mult_result <= mult_comp_result[31:0];
end
endmodule
//`define rFIFOINPUTWIDTH 64
`define rFIFOSIZE 256
`define rFIFOSIZEWIDTH 8
`define rFIFOOUTPUTWIDTH 1024
`define rFIFORSIZEWIDTH 4
`define wFIFOINPUTWIDTH 12'b010000000000
`define wFIFOSIZE 6'b010000
`define wFIFOSIZEWIDTH 4'b0100
`define wFIFOOUTPUTWIDTH 8'b01000000
`define wFIFORSIZEWIDTH 5'b01000
//for addr_fifo
`define aFIFOSIZE 6'b010000
`define aFIFOSIZEWIDTH 4'b0100
`define aFIFOWIDTH 4'b0111
//for memfifo
`define mFIFOSIZE 16
`define mFIFOSIZEWIDTH 4
//`define mFIFOWIDTH 28
`define BURSTLEN 3'b010
`define BURSTWIDTH 3'b010
`define DATAWIDTH 12'b010000000000
`define DATANUMBYTES 9'b010000000
`define MEMCONWIDTH 8'b01000000
`define MEMCONNUMBYTES 5'b01000
`define DDRSIZEWIDTH 6'b011000
`define FIFOSIZE 6'b010000
`define FIFOSIZEWIDTH 4'b0100
`define RAMWIDTH 12'b010000000000
`define RAMNUMBYTES 9'b010000000
`define RAMSIZEWIDTH 4'b0111
`define RATIO 6'b010000
`define RAMLAT 4'b0101
`define dIDLE 0
`define dWRITE 1
`define dREAD 2
module DataTransferUnit (clk, dtu_write_req, dtu_read_req, dtu_mem_addr, dtu_ram_addr, dtu_size, dtu_ack, dtu_done,
ram_read_addr, ram_read_data, ram_write_byte_en, ram_write_data, ram_write_addr, ram_write_en,
mem_rdata, mem_rdata_valid, mem_ready, mem_wdata_req, reset_n,
burst_begin, mem_local_addr, mem_be, mem_read_req, mem_size, mem_wdata, mem_write_req
);
output burst_begin;
output [`DDRSIZEWIDTH-1:0] mem_local_addr;
output [`MEMCONNUMBYTES-1: 0] mem_be;
output mem_read_req;
output [`BURSTWIDTH-1:0] mem_size;
output [`MEMCONWIDTH-1:0] mem_wdata;
output mem_write_req;
input clk;
input [`MEMCONWIDTH-1:0] mem_rdata;
input mem_rdata_valid;
input mem_ready;
input mem_wdata_req;
input reset_n;
input dtu_write_req;
input dtu_read_req;
input [`DDRSIZEWIDTH-1:0] dtu_mem_addr;
input [`RAMSIZEWIDTH-1:0] dtu_ram_addr;
input [6:0] dtu_size;
output dtu_ack;
output dtu_done;
output[`RAMWIDTH-1:0] ram_write_data;
input[`RAMWIDTH-1:0] ram_read_data;
output[`RAMSIZEWIDTH-1:0] ram_write_addr, ram_read_addr;
output[`RAMNUMBYTES-1:0] ram_write_byte_en;
output ram_write_en;
reg[`DDRSIZEWIDTH-1:0] mem_addr0;
reg[`DDRSIZEWIDTH-1:0] mem_addr1;
reg[`DDRSIZEWIDTH-1:0] mem_addr2;
reg[`DDRSIZEWIDTH-1:0] mem_addr3;
reg[`DDRSIZEWIDTH-1:0] mem_addr4;
reg[`DDRSIZEWIDTH-1:0] mem_addr5;
reg [1:0] state;
wire [`DATAWIDTH-1:0] rdata, ram_write_dataw, ram_read_dataw;
wire [`RAMSIZEWIDTH-1:0] rfifo_addr;
reg [`RAMLAT-1:0]fifo_write_reg;
reg [`RAMLAT-1:0]write_req_reg;
reg [`RAMLAT-1:0]read_req_reg;
reg [0:0]fifo_read_reg;
reg rdata_valid;
reg [1:0]test_complete_reg;
reg [`BURSTWIDTH-1:0] size_count0;
reg [`BURSTWIDTH-1:0] size_count1;
reg [`BURSTWIDTH-1:0] size_count2;
reg [`BURSTWIDTH-1:0] size_count3;
reg [`BURSTWIDTH-1:0] size_count4;
reg [`RAMSIZEWIDTH-1:0] size;
reg [`RAMSIZEWIDTH-1:0]ram_addr0;
reg [`RAMSIZEWIDTH-1:0]ram_addr1;
reg [`RAMSIZEWIDTH-1:0]ram_addr2;
reg [`RAMSIZEWIDTH-1:0]ram_addr3;
reg [`RAMSIZEWIDTH-1:0]ram_addr4;
reg [4:0] data_count;
reg ram_write_en_reg;
wire read_req;
wire write_req;
wire [`FIFOSIZEWIDTH-1:0] wfifo_count;
wire rfull, wempty, rempty, rdcmd_empty, wrcmd_full, wrcmd_empty, rdata_empty;
wire [`DATAWIDTH-1:0] mem_data;
wire not_stall;
wire fifo_write, fifo_read;
wire rdata_req;
wire [`BURSTWIDTH+`DDRSIZEWIDTH+1:0] wrmem_cmd, rdmem_cmd;
wire mem_cmd_ready, mem_cmd_issue;
// FIFOs to interact with off-chip memory
memcmd_fifo cmd_store(
//.aclr(~reset_n),
//.rdclk(phy_clk),
.clk(clk),
.data(wrmem_cmd),
.rdreq(mem_cmd_ready),
//.rdempty(rdcmd_empty),
.wrreq(mem_cmd_issue),
.full(wrcmd_full),
.empty(wrcmd_empty),
.q(rdmem_cmd)
);
wfifo wdata_store(
//.rdclk(phy_clk),
.clk(clk),
.data(mem_data),
.rdreq(mem_wdata_req),
.wrreq(fifo_write),
.empty(wempty),
.q(mem_wdata),
.usedw(wfifo_count)
);
addr_fifo raddress_store (
.clk(clk),
.data(ram_addr3),
.wrreq(fifo_read),
.rdreq(rdata_req),
.empty(rempty),
.full(rfull),
.q(rfifo_addr)
);
rfifo rdata_store(
.clk(clk),
.data(mem_rdata),
.rdreq(rdata_req),
//.wrclk(phy_clk),
.wrreq(mem_rdata_valid),
.empty(rdata_empty),
.q(rdata)
);
assign mem_cmd_ready = (mem_ready == 1'b1);// && (rdcmd_empty == 0);
assign mem_cmd_issue = (wrcmd_full == 1'b0) && (write_req == 1 || read_req == 1'b1 || wrcmd_empty == 1'b1);
assign wrmem_cmd[27:26] = size_count0;
assign wrmem_cmd[`DDRSIZEWIDTH+1:2] = mem_addr0;
assign wrmem_cmd[1] = read_req;
assign wrmem_cmd[0] = write_req;
assign mem_write_req = rdmem_cmd[0];// && rdcmd_empty == 0;
assign mem_read_req = rdmem_cmd[1];// && rdcmd_empty == 0;
assign mem_local_addr = rdmem_cmd[`DDRSIZEWIDTH+1:2];
assign burst_begin = 0;
assign mem_size = rdmem_cmd[`BURSTWIDTH+`DDRSIZEWIDTH+1:`DDRSIZEWIDTH+2];
assign mem_be = ~0;
assign fifo_write = fifo_write_reg[0];
assign write_req = (not_stall) ? write_req_reg[0] : 0;
assign read_req = (not_stall) ? read_req_reg[0] : 0;
assign fifo_read = (not_stall) ? fifo_read_reg[0] : 0;
assign not_stall = (wfifo_count < `FIFOSIZE-5) && (rfull == 0) && (wrcmd_full == 0);
assign dtu_ack = (state == `dIDLE);
assign dtu_done = (state == `dIDLE) && wempty && rempty;
assign ram_write_dataw[63:0] = rdata[1023:960];
assign mem_data[63:0] = ram_read_dataw[1023:960];
assign ram_write_dataw[127:64] = rdata[959:896];
assign mem_data[127:64] = ram_read_dataw[959:896];
assign ram_write_dataw[191:128] = rdata[895:832];
assign mem_data[191:128] = ram_read_dataw[895:832];
assign ram_write_dataw[255:192] = rdata[831:768];
assign mem_data[255:192] = ram_read_dataw[831:768];
assign ram_write_dataw[319:256] = rdata[767:704];
assign mem_data[319:256] = ram_read_dataw[767:704];
assign ram_write_dataw[383:320] = rdata[703:640];
assign mem_data[383:320] = ram_read_dataw[703:640];
assign ram_write_dataw[447:384] = rdata[639:576];
assign mem_data[447:384] = ram_read_dataw[639:576];
assign ram_write_dataw[511:448] = rdata[575:512];
assign mem_data[511:448] = ram_read_dataw[575:512];
assign ram_write_dataw[575:512] = rdata[511:448];
assign mem_data[575:512] = ram_read_dataw[511:448];
assign ram_write_dataw[639:576] = rdata[447:384];
assign mem_data[639:576] = ram_read_dataw[447:384];
assign ram_write_dataw[703:640] = rdata[383:320];
assign mem_data[703:640] = ram_read_dataw[383:320];
assign ram_write_dataw[767:704] = rdata[319:256];
assign mem_data[767:704] = ram_read_dataw[319:256];
assign ram_write_dataw[831:768] = rdata[255:192];
assign mem_data[831:768] = ram_read_dataw[255:192];
assign ram_write_dataw[895:832] = rdata[191:128];
assign mem_data[895:832] = ram_read_dataw[191:128];
assign ram_write_dataw[959:896] = rdata[127:64];
assign mem_data[959:896] = ram_read_dataw[127:64];
assign ram_write_dataw[1023:960] = rdata[63:0];
assign mem_data[1023:960] = ram_read_dataw[63:0];
assign ram_write_data = ram_write_dataw[1023:0];
assign ram_read_dataw[1023:0] = ram_read_data;
assign ram_write_addr = rfifo_addr;
assign ram_read_addr = ram_addr4;
assign ram_write_byte_en = ~0;
assign ram_write_en = ram_write_en_reg;
assign rdata_req = !rdata_empty;
// FSM to produce off-chip memory commands
always @ (posedge clk)
begin
if (reset_n == 1'b0)
begin
state <= `dIDLE;
end
else
begin
case (state)
`dIDLE:
begin
if (dtu_write_req)
state <= `dWRITE;
else if (dtu_read_req)
state <= `dREAD;
else
state <= `dIDLE;
end
`dWRITE:
begin
if (not_stall && size == 0 && data_count < `BURSTLEN)
state <= `dIDLE;
else
state <= `dWRITE;
end
`dREAD:
begin
if (not_stall && size == 0 && data_count < `BURSTLEN)
state <= `dIDLE;
else
state <= `dREAD;
end
default:
begin
state <= `dIDLE;
end
endcase
end
end
always @ (posedge clk)
begin
if (reset_n == 0)
begin
size <= 0;
data_count <= 0;
size_count4 <= 1;
mem_addr5 <= 0;
ram_addr4 <= 0;
fifo_write_reg[`RAMLAT-1] <= 0;
write_req_reg[`RAMLAT-1] <= 0;
fifo_read_reg[0] <= 0;
read_req_reg[`RAMLAT-1] <= 0;
end
else if (state == `dIDLE)
begin
size <= dtu_size;
size_count4 <= `BURSTLEN;
mem_addr5 <= dtu_mem_addr;
ram_addr4 <= dtu_ram_addr;
fifo_write_reg[`RAMLAT-1] <= 1'b0;
write_req_reg[`RAMLAT-1] <= 1'b0;
fifo_read_reg[0] <= 1'b0;
read_req_reg[`RAMLAT-1] <= 1'b0;
data_count <= 0;
end
else if (data_count >= `BURSTLEN && not_stall)
begin
data_count <= data_count - `BURSTLEN;
mem_addr5 <= mem_addr5 + `BURSTLEN;
fifo_write_reg[`RAMLAT-1] <= 1'b0;
write_req_reg[`RAMLAT-1] <= state == `dWRITE;
fifo_read_reg[0] <= 0;
read_req_reg[`RAMLAT-1] <= state == `dREAD;
end
else if (size == 0 && data_count == 0 && not_stall==1'b1)
begin
fifo_write_reg[`RAMLAT-1] <= 0;
write_req_reg[`RAMLAT-1] <= 0;
fifo_read_reg[0] <= 0;
read_req_reg[`RAMLAT-1] <= 0;
end
else if (size == 0 && not_stall==1'b1)
begin
size_count4 <= data_count[`BURSTWIDTH-1:0];
fifo_write_reg[`RAMLAT-1] <= 0;
write_req_reg[`RAMLAT-1] <= state == `dWRITE;
fifo_read_reg[0] <= 0;
read_req_reg[`RAMLAT-1] <= state == `dREAD;
end
else if (not_stall==1'b1)
begin
size <= size - 1;
data_count <= data_count + `RATIO - `BURSTLEN;
mem_addr5 <= mem_addr5 + `BURSTLEN;
ram_addr4 <= ram_addr4+1;
fifo_write_reg[`RAMLAT-1] <= state == `dWRITE;
write_req_reg[`RAMLAT-1] <= state == `dWRITE;
fifo_read_reg[0] <= state == `dREAD;
read_req_reg[`RAMLAT-1] <= state == `dREAD;
end
else
begin
fifo_write_reg[`RAMLAT-1] <= 0;
end
end
always @ (posedge clk)
begin
if (reset_n == 0)
begin
fifo_write_reg[0] <= 1'b0;
fifo_write_reg[1] <= 1'b0;
fifo_write_reg[2] <= 1'b0;
fifo_write_reg[3] <= 1'b0;
end
else
begin
fifo_write_reg[0] <= fifo_write_reg[1];
fifo_write_reg[1] <= fifo_write_reg[2];
fifo_write_reg[2] <= fifo_write_reg[3];
fifo_write_reg[3] <= fifo_write_reg[4];
end
if (reset_n == 1'b0)
begin
mem_addr0 <= 0;
ram_addr0 <= 0;
size_count0 <= 1;
write_req_reg[0] <= 0;
read_req_reg[0] <= 0;
mem_addr1 <= 0;
ram_addr1 <= 0;
size_count1 <= 1;
write_req_reg[1] <= 0;
read_req_reg[1] <= 0;
mem_addr2 <= 0;
ram_addr2 <= 0;
size_count2 <= 1;
write_req_reg[2] <= 0;
read_req_reg[2] <= 0;
mem_addr3 <= 0;
ram_addr3 <= 0;
size_count3 <= 1;
write_req_reg[3] <= 0;
read_req_reg[3] <= 0;
mem_addr4 <= 0;
end
else if (not_stall)
begin
size_count0 <= size_count1;
mem_addr0 <= mem_addr1;
ram_addr0 <= ram_addr1;
write_req_reg[0] <= write_req_reg[1];
read_req_reg[0] <= read_req_reg[1];
size_count1 <= size_count2;
mem_addr1 <= mem_addr2;
ram_addr1 <= ram_addr2;
write_req_reg[1] <= write_req_reg[2];
read_req_reg[1] <= read_req_reg[2];
size_count2 <= size_count3;
mem_addr2 <= mem_addr3;
ram_addr2 <= ram_addr3;
write_req_reg[2] <= write_req_reg[3];
read_req_reg[2] <= read_req_reg[3];
size_count3 <= size_count4;
mem_addr3 <= mem_addr4;
ram_addr3 <= ram_addr4;
write_req_reg[3] <= write_req_reg[4];
read_req_reg[3] <= read_req_reg[4];
mem_addr4 <= mem_addr5;
end
ram_write_en_reg <= rdata_req;
end
endmodule
module rfifo (
clk,
data,
rdreq,
wrreq,
empty,
q
);
input clk;
input wrreq;
input rdreq;
input [`rFIFOINPUTWIDTH-1:0] data;
output empty;
output [`rFIFOOUTPUTWIDTH-1:0] q;
reg [`rFIFORSIZEWIDTH-1:0] wr_pointer;
reg [`rFIFORSIZEWIDTH-1:0] rd_pointer;
reg [`rFIFORSIZEWIDTH:0] status_cnt;
reg [`rFIFOOUTPUTWIDTH-1:0] q ;
reg[3:0] counter;
wire [`rFIFOINPUTWIDTH-1:0] data_ram;
assign empty = (status_cnt == 9'b000000000);
wire [`rFIFOINPUTWIDTH-1:0]junk_input;
wire [`rFIFOINPUTWIDTH-1:0]junk_output;
assign junk_input = 64'b0000000000000000000000000000000000000000000000000000000000000000;
always @ (posedge clk)
begin //WRITE_POINTER
if (wrreq)
begin
wr_pointer <= wr_pointer + 1'b1;
end
end
always @ (posedge clk)
begin //READ_POINTER
if (rdreq)
begin
rd_pointer <= rd_pointer + 2'b01;
end
end
always @ (posedge clk )
begin //READ_DATA
if (rdreq)
counter <= 0;
else
counter <= counter + 2'b01;
if(counter == 0)
q[`rFIFOINPUTWIDTH-1:0] <= data_ram;
else if (counter == 1)
q[127:64] <= data_ram;
else if (counter == 2)
q[191:128] <= data_ram;
else if (counter == 3)
q[255:192] <= data_ram;
else if (counter == 4)
q[319:256] <= data_ram;
else if (counter == 5)
q[383:320] <= data_ram;
else if (counter == 6)
q[447:384] <= data_ram;
else if (counter == 7)
q[511:448] <= data_ram;
else if (counter == 8)
q[575:512] <= data_ram;
else if (counter == 9)
q[639:576] <= data_ram;
else if (counter == 10)
q[703:640] <= data_ram;
else if (counter == 11)
q[767:704] <= data_ram;
else if (counter == 12)
q[831:768] <= data_ram;
else if (counter == 13)
q[895:832] <= data_ram;
else if (counter == 14)
q[959:896] <= data_ram;
else if (counter == 15)
q[1023:960] <= data_ram;
end
always @ (posedge clk )
begin // : STATUS_COUNTER
if ((rdreq) && (!wrreq) && (status_cnt != 0))
status_cnt <= status_cnt - 1'b1;
// Write but no read.
else if ((wrreq) && (!rdreq) && (status_cnt != 64 ))
status_cnt <= status_cnt + 1'b1;
end
dual_port_ram_rfifo ram_addr(
.we1 (wrreq) , // write enable
.we2 (rdreq) , // Read enable
.addr1 (wr_pointer) , // address_0 input
.addr2 (rd_pointer) , // address_q input
.data1 (data) , // data_0 bi-directional
.data2 (junk_input), // data_1 bi-directional
.clk(clk),
.out1 (data_ram),
.out2 (junk_output)
);
endmodule
// synopsys translate_off
//`timescale 1 ps / 1 ps
// synopsys translate_on
module wfifo (
clk,
data,
rdreq,
wrreq,
empty,
q,
usedw
);
input clk;
input wrreq;
input rdreq;
input [`wFIFOINPUTWIDTH-1:0] data;
output empty;
output [`wFIFOOUTPUTWIDTH-1:0] q;
output [`wFIFOSIZEWIDTH-1:0] usedw;
//-----------Internal variables-------------------
reg [`wFIFOSIZEWIDTH-1:0] wr_pointer;
reg [`wFIFOSIZEWIDTH-1:0] rd_pointer;
reg [`wFIFOSIZEWIDTH:0] status_cnt;
reg [`wFIFOOUTPUTWIDTH-1:0] q ;
reg[3:0] counter;
wire [`wFIFOINPUTWIDTH-1:0] data_ram ;
assign empty = (status_cnt == 5'b00000);
wire [`wFIFOINPUTWIDTH-1:0]junk_input;
wire [`wFIFOINPUTWIDTH-1:0]junk_output;
assign junk_input = 1024'b0;
always @ (posedge clk)
begin //WRITE_POINTER
if (wrreq)
begin
wr_pointer <= wr_pointer + 1'b1;
end
end
always @ (posedge clk)
begin //READ_POINTER
if (rdreq)
begin
rd_pointer <= rd_pointer + 2'b01;
end
end
always @ (posedge clk )
begin //READ_DATA
if (rdreq)
counter <= 0;
else
counter <= counter + 2'b01;
if(counter == 0)
q <= data_ram[63:0];
else if(counter == 1)
q <= data_ram[127:64];
else if(counter == 2)
q <= data_ram[191:128];
else if(counter == 3)
q <= data_ram[255:192];
else if(counter == 4)
q <= data_ram[319:256];
else if(counter == 5)
q <= data_ram[383:320];
else if(counter == 6)
q <= data_ram[447:384];
else if(counter == 7)
q <= data_ram[511:448];
else if(counter == 8)
q <= data_ram[575:512];
else if(counter == 9)
q <= data_ram[639:576];
else if(counter == 10)
q <= data_ram[703:640];
else if(counter == 11)
q <= data_ram[767:704];
else if(counter == 12)
q <= data_ram[831:768];
else if(counter == 13)
q <= data_ram[895:832];
else if(counter == 14)
q <= data_ram[959:896];
else if(counter == 15)
q <= data_ram[1023:960];
end
always @ (posedge clk )
begin // : STATUS_COUNTER
if ((rdreq) && (!wrreq) && (status_cnt != 5'b00000))
status_cnt <= status_cnt - 1'b1;
// Write but no read.
else if ((wrreq) && (!rdreq) && (status_cnt != 5'b10000 ))
status_cnt <= status_cnt + 1'b1;
end
assign usedw = status_cnt[`wFIFOSIZEWIDTH-1:0];
dual_port_ram_wfifo ram_addr(
.we1 (wrreq) , // write enable
.we2 (rdreq) , // Read enable
.addr1 (wr_pointer) , // address_0 input
.addr2 (rd_pointer) , // address_q input
.data1 (data) , // data_0 bi-directional
.data2 (junk_input), // data_1 bi-directional
.clk(clk),
.out1 (data_ram),
.out2 (junk_output)
);
endmodule
// synopsys translate_off
//`timescale 1 ps / 1 ps
// synopsys translate_on
module addr_fifo (
clk,
data,
wrreq,
rdreq,
empty,
full,
q
);
input clk;
input [`aFIFOWIDTH-1:0] data;
input rdreq;
input wrreq;
output empty;
output full;
output [`aFIFOWIDTH-1:0] q;
reg [`aFIFOSIZEWIDTH-1:0] wr_pointer;
reg [`aFIFOSIZEWIDTH-1:0] rd_pointer;
reg [`aFIFOSIZEWIDTH:0] status_cnt;
reg [`aFIFOWIDTH-1:0] q ;
wire [`aFIFOWIDTH-1:0] data_ram ;
assign full = (status_cnt == 5'b01111);
assign empty = (status_cnt == 5'b00000);
wire [`aFIFOWIDTH-1:0]junk_input;
wire [`aFIFOWIDTH-1:0]junk_output;
assign junk_input = 7'b0000000;
always @ (posedge clk)
begin //WRITE_POINTER
if (wrreq)
begin
wr_pointer <= wr_pointer + 1'b1;
end
end
always @ (posedge clk)
begin //READ_POINTER
if (rdreq)
begin
rd_pointer <= rd_pointer + 1'b1;
end
end
always @ (posedge clk )
begin //READ_DATA
if (rdreq) begin
q <= data_ram;
end
end
always @ (posedge clk )
begin // : STATUS_COUNTER
if ((rdreq) && (!wrreq) && (status_cnt != 5'b00000))
status_cnt <= status_cnt - 1'b1;
// Write but no read.
else if ((wrreq) && (!rdreq) && (status_cnt != 5'b10000))
status_cnt <= status_cnt + 1;
end
dual_port_ram_afifo ram_addr(
.we1 (wrreq) , // write enable
.we2 (rdreq) , // Read enable
.addr1 (wr_pointer) , // address_0 input
.addr2 (rd_pointer) , // address_q input
.data1 (data) , // data_0 bi-directional
.data2 (junk_input), // data_1 bi-directional
.clk(clk),
.out1 (data_ram),
.out2 (junk_output)
);
endmodule
module memcmd_fifo (
clk,
data,
rdreq,
wrreq,
full,
empty,
q
);
input clk;
input [`mFIFOWIDTH-1:0] data;
input wrreq;
input rdreq;
output full;
output empty;
output [`mFIFOWIDTH-1:0] q;
reg [`mFIFOSIZEWIDTH-1:0] wr_pointer;
reg [`mFIFOSIZEWIDTH-1:0] rd_pointer;
reg [`mFIFOSIZEWIDTH:0] status_cnt;
reg [`mFIFOWIDTH-1:0] q ;
wire [`mFIFOWIDTH-1:0] data_ram;
assign full = (status_cnt ==5'b01111);
assign empty = (status_cnt == 5'b00000);
wire [`mFIFOWIDTH-1:0]junk_input;
wire [`mFIFOWIDTH-1:0]junk_output;
assign junk_input = 28'b0000000000000000000000000000;
always @ (posedge clk)
begin //WRITE_POINTER
if (wrreq)
begin
wr_pointer <= wr_pointer + 1'b1;
end
end
always @ (posedge clk)
begin //READ_POINTER
if (rdreq)
begin
rd_pointer <= rd_pointer + 1'b1;
end
end
always @ (posedge clk )
begin //READ_DATA
if (rdreq) begin
q <= data_ram;
end
end
always @ (posedge clk )
begin // : STATUS_COUNTER
if ((rdreq) && (!wrreq) && (status_cnt != 0))
status_cnt <= status_cnt - 1'b1;
else if ((wrreq) && (!rdreq) && (status_cnt != 16 ))
status_cnt <= status_cnt + 1'b1;
end
dual_port_ram_mfifo ram_addr(
.we1 (wrreq) , // write enable
.we2 (rdreq) , // Read enable
.addr1 (wr_pointer) , // address_0 input
.addr2 (rd_pointer) , // address_q input
.data1 (data) , // data_0 bi-directional
.data2 (junk_input), // data_1 bi-directional
.clk(clk),
.out1 (data_ram),
.out2 (junk_output));
endmodule
`define ZERO 8'b00000000
`define ONE 8'b00000001
`define TWO 8'b00000010
`define THREE 8'b00000011
`define FOUR 8'b00000100
`define FIVE 8'b00000101
`define SIX 8'b00000110
`define SEVEN 8'b00000111
`define EIGHT 8'b00001000
`define NINE 8'b00001001
`define TEN 8'b00001010
`define ELEVEN 8'b00001011
`define TWELVE 8'b00001100
`define THIRTEEN 8'b00001101
`define FOURTEEN 8'b00001110
`define FIFTEEN 8'b00001111
`define SIXTEEN 8'b00010000
`define SEVENTEEN 8'b00010001
`define EIGHTEEN 8'b00010010
`define NINETEEN 8'b00010011
`define TWENTY 8'b00010100
`define TWENTYONE 8'b00010101
`define TWENTYTWO 8'b00010110
`define TWENTYTHREE 8'b00010111
`define TWENTYFOUR 8'b00011000
module fpu_add (clock, a1, b1, sum);
input clock;
input [31:0]a1;
input [31:0]b1;
output [31:0]sum;
reg [31:0]sum;
//Split up the numbers into exponents and mantissa.
reg [7:0]a_exp;
//reg [7:0]b_exp;
reg [23:0]a_man;
reg [23:0]b_man;
reg [7:0]temp;
reg [24:0]sum_man;
//reg [7:0]sum_exp;
//introduce latency on inputs
reg [31:0]a;
reg [31:0]b;
always @ (posedge clock) begin
a <= a1;
b <= b1;
end
reg smaller; //smaller is 1 if a < b, 0 otherwise
//Shift mantissa's to have the same exponent
always @ (a or b) begin
//a_exp = a[30:23];
//b_exp = b[30:23];
//a_man = {1'b1, a[22:0]};
//b_man = {1'b1, b[22:0]};
if (a[30:23] < b[30:23]) begin
temp = b[30:23] - a[30:23];
//a_man = {1'b1, a[22:0]} >> temp; //Expand into case statement, as below.
case (temp)
`ONE: begin
a_man = {1'b1, a[22:0]} >> `ONE;
end
`TWO: begin
a_man = {1'b1, a[22:0]} >> `TWO;
end
`THREE: begin
a_man = {1'b1, a[22:0]} >> `THREE;
end
`FOUR: begin
a_man = {1'b1, a[22:0]} >> `FOUR;
end
`FIVE: begin
a_man = {1'b1, a[22:0]} >> `FIVE;
end
`SIX: begin
a_man = {1'b1, a[22:0]} >> `SIX;
end
`SEVEN: begin
a_man = {1'b1, a[22:0]} >> `SEVEN;
end
`EIGHT: begin
a_man = {1'b1, a[22:0]} >> `EIGHT;
end
`NINE: begin
a_man = {1'b1, a[22:0]} >> `NINE;
end
`TEN: begin
a_man = {1'b1, a[22:0]} >> `TEN;
end
`ELEVEN: begin
a_man = {1'b1, a[22:0]} >> `ELEVEN;
end
`TWELVE: begin
a_man = {1'b1, a[22:0]} >> `TWELVE;
end
`THIRTEEN: begin
a_man = {1'b1, a[22:0]} >> `THIRTEEN;
end
`FOURTEEN: begin
a_man = {1'b1, a[22:0]} >> `FOURTEEN;
end
`FIFTEEN: begin
a_man = {1'b1, a[22:0]} >> `FIFTEEN;
end
`SIXTEEN: begin
a_man = {1'b1, a[22:0]} >> `SIXTEEN;
end
`SEVENTEEN: begin
a_man = {1'b1, a[22:0]} >> `SEVENTEEN;
end
`EIGHTEEN: begin
a_man = {1'b1, a[22:0]} >> `EIGHTEEN;
end
`NINETEEN: begin
a_man = {1'b1, a[22:0]} >> `NINETEEN;
end
`TWENTY: begin
a_man = {1'b1, a[22:0]} >> `TWENTY;
end
`TWENTYONE: begin
a_man = {1'b1, a[22:0]} >> `TWENTYONE;
end
`TWENTYTWO: begin
a_man = {1'b1, a[22:0]} >> `TWENTYTWO;
end
`TWENTYTHREE: begin
a_man = {1'b1, a[22:0]} >> `TWENTYTHREE;
end
`TWENTYFOUR: begin
a_man = {1'b1, a[22:0]} >> `TWENTYFOUR;
end
default: begin //More than twenty-four, shift by twenty-four. It is a boundary case.
a_man = {1'b1, a[22:0]} >> `TWENTYFOUR;
end
endcase
b_man = {1'b1, b[22:0]};
a_exp = b[30:23];
//b_exp = b[30:23];
end else if (a[30:23] > b[30:23]) begin
temp = a[30:23] - b[30:23];
a_man = {1'b1, a[22:0]};
//b_man = {1'b1, b[22:0]} >> temp; //Expand into case statement, as below.
case (temp)
`ONE: begin
b_man = {1'b1, b[22:0]} >> `ONE;
end
`TWO: begin
b_man = {1'b1, b[22:0]} >> `TWO;
end
`THREE: begin
b_man = {1'b1, b[22:0]} >> `THREE;
end
`FOUR: begin
b_man = {1'b1, b[22:0]} >> `FOUR;
end
`FIVE: begin
b_man = {1'b1, b[22:0]} >> `FIVE;
end
`SIX: begin
b_man = {1'b1, b[22:0]} >> `SIX;
end
`SEVEN: begin
b_man = {1'b1, b[22:0]} >> `SEVEN;
end
`EIGHT: begin
b_man = {1'b1, b[22:0]} >> `EIGHT;
end
`NINE: begin
b_man = {1'b1, b[22:0]} >> `NINE;
end
`TEN: begin
b_man = {1'b1, b[22:0]} >> `TEN;
end
`ELEVEN: begin
b_man = {1'b1, b[22:0]} >> `ELEVEN;
end
`TWELVE: begin
b_man = {1'b1, b[22:0]} >> `TWELVE;
end
`THIRTEEN: begin
b_man = {1'b1, b[22:0]} >> `THIRTEEN;
end
`FOURTEEN: begin
b_man = {1'b1, b[22:0]} >> `FOURTEEN;
end
`FIFTEEN: begin
b_man = {1'b1, b[22:0]} >> `FIFTEEN;
end
`SIXTEEN: begin
b_man = {1'b1, b[22:0]} >> `SIXTEEN;
end
`SEVENTEEN: begin
b_man = {1'b1, b[22:0]} >> `SEVENTEEN;
end
`EIGHTEEN: begin
b_man = {1'b1, b[22:0]} >> `EIGHTEEN;
end
`NINETEEN: begin
b_man = {1'b1, b[22:0]} >> `NINETEEN;
end
`TWENTY: begin
b_man = {1'b1, b[22:0]} >> `TWENTY;
end
`TWENTYONE: begin
b_man = {1'b1, b[22:0]} >> `TWENTYONE;
end
`TWENTYTWO: begin
b_man = {1'b1, b[22:0]} >> `TWENTYTWO;
end
`TWENTYTHREE: begin
b_man = {1'b1, b[22:0]} >> `TWENTYTHREE;
end
`TWENTYFOUR: begin
b_man = {1'b1, b[22:0]} >> `TWENTYFOUR;
end
default: begin //More than twenty-four, shift by twenty-four. It is a boundary case.
b_man = {1'b1, b[22:0]} >> `TWENTYFOUR;
end
endcase
a_exp = a[30:23];
//b_exp = a[30:23];
end else begin
temp = 8'b0;
a_man = {1'b1, a[22:0]};
b_man = {1'b1, b[22:0]};
a_exp = a[30:23];
end
end
//Perform the addition operation
always @ (a_man or b_man or a or b) begin
if (a_man < b_man) begin
smaller = 1'b1;
end else begin
smaller = 1'b0;
end
//both positive
if (~a[31] && ~b[31]) begin
sum_man = a_man + b_man;
sum[31] = 1'b0;
end
//both negative
else if (a[31] && b[31]) begin
sum_man = a_man + b_man;
sum[31] = 1'b1;
end
//a pos, b neg
else if (~a[31] && b[31]) begin
if (smaller) begin //a < b
sum_man = b_man - a_man;
sum[31] = 1'b1;
end else begin
sum_man = a_man - b_man;
sum[31] = 1'b0;
end
end
//a neg, b pos
else /*if (a[31] && ~b[31])*/ begin
if (smaller) begin //a < b
sum_man = b_man - a_man;
sum[31] = 1'b0;
end else begin
sum_man = a_man - b_man;
sum[31] = 1'b1;
end
end
end
//Store the number
// we already have the sign.
always @ (sum_man or a_exp) begin
if (sum_man[24])begin //shif sum >> by 1, add 1 to the exponent.
sum[22:0] = sum_man[23:1];
sum[30:23] = a_exp + 8'b00000001;
end else if (sum_man[23]) begin //do nothing
sum[22:0] = sum_man[22:0];
sum[30:23] = a_exp;
end else if (sum_man[22]) begin //shift << by 1, subtract 1 from exponent.
sum[22:0] = {sum_man[21:0], 1'b0};
sum[30:23] = a_exp - 8'b00000001;
end else if (sum_man[21]) begin //shift << by 2, subtract 2 from exponent.
sum[22:0] = {sum_man[20:0], 2'b0};
sum[30:23] = a_exp - 8'b00000010;
end else if (sum_man[20]) begin //shift << by 3, subtract 3 from exponent.
sum[22:0] = {sum_man[19:0], 3'b0};
sum[30:23] = a_exp - 8'b00000011;
end else if (sum_man[19]) begin //shift << by 4, subtract 4 from exponent.
sum[22:0] = {sum_man[18:0], 4'b0};
sum[30:23] = a_exp - 8'b00000100;
end else if (sum_man[18]) begin //shift << by 5, subtract 5 from exponent.
sum[22:0] = {sum_man[17:0], 5'b0};
sum[30:23] = a_exp - 8'b00000101;
end else if (sum_man[17]) begin //shift << by 6, subtract 6 from exponent.
sum[22:0] = {sum_man[16:0], 6'b0};
sum[30:23] = a_exp - 8'b00000110;
end else if (sum_man[16]) begin //shift << by 7, subtract 7 from exponent.
sum[22:0] = {sum_man[15:0], 7'b0};
sum[30:23] = a_exp - 8'b00000111;
end else if (sum_man[15]) begin //shift << by 8, subtract 8 from exponent.
sum[22:0] = {sum_man[14:0], 8'b0};
sum[30:23] = a_exp - 8'b00001000;
end else if (sum_man[14]) begin //shift << by 9, subtract 9 from exponent.
sum[22:0] = {sum_man[13:0], 9'b0};
sum[30:23] = a_exp - 8'b00001001;
end else if (sum_man[13]) begin //shift << by 10, subtract 10 from exponent.
sum[22:0] = {sum_man[12:0], 10'b0};
sum[30:23] = a_exp - 8'b00001010;
end else if (sum_man[12]) begin //shift << by 11, subtract 11 from exponent.
sum[22:0] = {sum_man[11:0], 11'b0};
sum[30:23] = a_exp - 8'b00001011;
end else if (sum_man[11]) begin //shift << by 12, subtract 12 from exponent.
sum[22:0] = {sum_man[10:0], 12'b0};
sum[30:23] = a_exp - 8'b00001100;
end else if (sum_man[10]) begin //shift << by 13, subtract 13 from exponent.
sum[22:0] = {sum_man[9:0], 13'b0};
sum[30:23] = a_exp - 8'b00001101;
end else if (sum_man[9]) begin //shift << by 14, subtract 14 from exponent.
sum[22:0] = {sum_man[8:0], 14'b0};
sum[30:23] = a_exp - 8'b00001110;
end else if (sum_man[8]) begin //shift << by 15, subtract 15 from exponent.
sum[22:0] = {sum_man[7:0], 15'b0};
sum[30:23] = a_exp - 8'b00001111;
end else if (sum_man[7]) begin //shift << by 16, subtract 16 from exponent.
sum[22:0] = {sum_man[6:0], 16'b0};
sum[30:23] = a_exp - 8'b00010000;
end else if (sum_man[6]) begin //shift << by 17, subtract 17 from exponent.
sum[22:0] = {sum_man[5:0], 17'b0};
sum[30:23] = a_exp - 8'b00010001;
end else if (sum_man[5]) begin //shift << by 18, subtract 18 from exponent.
sum[22:0] = {sum_man[4:0], 18'b0};
sum[30:23] = a_exp - 8'b00010010;
end else if (sum_man[4]) begin //shift << by 19, subtract 19 from exponent.
sum[22:0] = {sum_man[3:0], 19'b0};
sum[30:23] = a_exp - 8'b00010011;
end else if (sum_man[3]) begin //shift << by 20, subtract 20 from exponent.
sum[22:0] = {sum_man[2:0], 20'b0};
sum[30:23] = a_exp - 8'b00010100;
end else if (sum_man[2]) begin //shift << by 21, subtract 21 from exponent.
sum[22:0] = {sum_man[1:0], 21'b0};
sum[30:23] = a_exp - 8'b00010101;
end else if (sum_man[1]) begin //shift << by 22, subtract 22 from exponent.
sum[22:0] = {sum_man[0:0], 22'b0};
sum[30:23] = a_exp - 8'b00010110;
end else /*if (sum_man[0])*/ begin //shift << by 23, subtract 23 from exponent.
sum[22:0] = 23'b0;
sum[30:23] = a_exp - 8'b00010111;
end
end
endmodule
module fpu_div(clock, n, d, div);
//n = numerator
//d = denomenator
//div = result
input clock;
input [31:0]n;
input [31:0]d;
output [31:0]div;
reg [31:0]div;
//Store the mantissa and exponents separately. Introduce the latency of 1.
reg [7:0]n_exp;
reg [7:0]d_exp;
reg [23:0]n_man;
reg [23:0]d_man;
reg n_sign;
reg d_sign;
wire [23:0]div_man;
reg [7:0]div_exp;
always @ (posedge clock) begin
n_exp <= n[30:23];
d_exp <= d[30:23];
n_man <= {1'b1, n[22:0]};
d_man <= {1'b1, d[22:0]};
n_sign <= n[31];
d_sign <= d[31];
end
//Find the exponent, store in div_exp.
always @ (n_exp or d_exp) begin
if (n_exp >= d_exp) begin
div_exp = 8'b01111111 + (n_exp - d_exp);
end else begin
div_exp = 8'b01111111 - (d_exp - n_exp);
end
end
//Divide the mantissas, store in div_man.
div_24b divide(.numer(n_man), .denom(d_man), .res(div_man));
//Store the result. Shift exponents appropriately. Store sign.
//Sign
always @ (n_sign or d_sign) begin
div[31] = n_sign ^ d_sign;
end
//Mantissa and Exponent
always @ (div_man or div_exp) begin
if (div_man[23]) begin //do nothing
div[22:0] = div_man[22:0];
div[30:23] = div_exp;
end else if (div_man[22]) begin //shift << by 1, subtract 1 from exponent.
div[22:0] = {div_man[21:0], 1'b0};
div[30:23] = div_exp - 8'b00000001;
end else if (div_man[21]) begin //shift << by 2, subtract 2 from exponent.
div[22:0] = {div_man[20:0], 2'b0};
div[30:23] = div_exp - 8'b00000010;
end else if (div_man[20]) begin //shift << by 3, subtract 3 from exponent.
div[22:0] = {div_man[19:0], 3'b0};
div[30:23] = div_exp - 8'b00000011;
end else if (div_man[19]) begin //shift << by 4, subtract 4 from exponent.
div[22:0] = {div_man[18:0], 4'b0};
div[30:23] = div_exp - 8'b00000100;
end else if (div_man[18]) begin //shift << by 5, subtract 5 from exponent.
div[22:0] = {div_man[17:0], 5'b0};
div[30:23] = div_exp - 8'b00000101;
end else if (div_man[17]) begin //shift << by 6, subtract 6 from exponent.
div[22:0] = {div_man[16:0], 6'b0};
div[30:23] = div_exp - 8'b00000110;
end else if (div_man[16]) begin //shift << by 7, subtract 7 from exponent.
div[22:0] = {div_man[15:0], 7'b0};
div[30:23] = div_exp - 8'b00000111;
end else if (div_man[15]) begin //shift << by 8, subtract 8 from exponent.
div[22:0] = {div_man[14:0], 8'b0};
div[30:23] = div_exp - 8'b00001000;
end else if (div_man[14]) begin //shift << by 9, subtract 9 from exponent.
div[22:0] = {div_man[13:0], 9'b0};
div[30:23] = div_exp - 8'b00001001;
end else if (div_man[13]) begin //shift << by 10, subtract 10 from exponent.
div[22:0] = {div_man[12:0], 10'b0};
div[30:23] = div_exp - 8'b00001010;
end else if (div_man[12]) begin //shift << by 11, subtract 11 from exponent.
div[22:0] = {div_man[11:0], 11'b0};
div[30:23] = div_exp - 8'b00001011;
end else if (div_man[11]) begin //shift << by 12, subtract 12 from exponent.
div[22:0] = {div_man[10:0], 12'b0};
div[30:23] = div_exp - 8'b00001100;
end else if (div_man[10]) begin //shift << by 13, subtract 13 from exponent.
div[22:0] = {div_man[9:0], 13'b0};
div[30:23] = div_exp - 8'b00001101;
end else if (div_man[9]) begin //shift << by 14, subtract 14 from exponent.
div[22:0] = {div_man[8:0], 14'b0};
div[30:23] = div_exp - 8'b00001110;
end else if (div_man[8]) begin //shift << by 15, subtract 15 from exponent.
div[22:0] = {div_man[7:0], 15'b0};
div[30:23] = div_exp - 8'b00001111;
end else if (div_man[7]) begin //shift << by 16, subtract 16 from exponent.
div[22:0] = {div_man[6:0], 16'b0};
div[30:23] = div_exp - 8'b00010000;
end else if (div_man[6]) begin //shift << by 17, subtract 17 from exponent.
div[22:0] = {div_man[5:0], 17'b0};
div[30:23] = div_exp - 8'b00010001;
end else if (div_man[5]) begin //shift << by 18, subtract 18 from exponent.
div[22:0] = {div_man[4:0], 18'b0};
div[30:23] = div_exp - 8'b00010010;
end else if (div_man[4]) begin //shift << by 19, subtract 19 from exponent.
div[22:0] = {div_man[3:0], 19'b0};
div[30:23] = div_exp - 8'b00010011;
end else if (div_man[3]) begin //shift << by 20, subtract 20 from exponent.
div[22:0] = {div_man[2:0], 20'b0};
div[30:23] = div_exp - 8'b00010100;
end else if (div_man[2]) begin //shift << by 21, subtract 21 from exponent.
div[22:0] = {div_man[1:0], 21'b0};
div[30:23] = div_exp - 8'b00010101;
end else if (div_man[1]) begin //shift << by 22, subtract 22 from exponent.
div[22:0] = {div_man[0:0], 22'b0};
div[30:23] = div_exp - 8'b00010110;
end else /*if (div_man[0])*/ begin //shift << by 23, subtract 23 from exponent.
div[22:0] = 23'b0;
div[30:23] = div_exp - 8'b00010111;
end
end
endmodule
module div_24b(numer, denom, res);
//input clock;
input [23:0]numer;
input [23:0]denom;
output [23:0]res;
reg [23:0]res;
//Pad with 23 zeros.
wire [46:0]denom_pad;
wire [46:0]numer23;
reg [46:0]numer22;
reg [46:0]numer21;
reg [46:0]numer20;
reg [46:0]numer19;
reg [46:0]numer18;
reg [46:0]numer17;
reg [46:0]numer16;
reg [46:0]numer15;
reg [46:0]numer14;
reg [46:0]numer13;
reg [46:0]numer12;
reg [46:0]numer11;
reg [46:0]numer10;
reg [46:0]numer9;
reg [46:0]numer8;
reg [46:0]numer7;
reg [46:0]numer6;
reg [46:0]numer5;
reg [46:0]numer4;
reg [46:0]numer3;
reg [46:0]numer2;
reg [46:0]numer1;
reg [46:0]numer0;
//always @ (posedge clock) begin
assign denom_pad = {23'b0, denom};
assign numer23 = {numer, 23'b0};
// end
//res[23]
always @ (denom_pad or numer23) begin
if (denom_pad[23:0] <= numer23[46:23]) begin
res[23] = 1'b1;
numer22 = {numer23[46:23] - denom_pad[23:0], 23'b0};
end else begin
res[23] = 1'b0;
numer22 = numer23;
end
if (denom_pad[24:0] <= numer22[46:22]) begin
res[22] = 1'b1;
numer21 = {numer22[46:22] - denom_pad[24:0], 22'b0};
end else begin
res[22] = 1'b0;
numer21 = numer22;
end
if (denom_pad[25:0] <= numer21[46:21]) begin
res[21] = 1'b1;
numer20 = {numer21[46:21] - denom_pad[25:0], 21'b0};
end else begin
res[21] = 1'b0;
numer20 = numer21;
end
if (denom_pad[26:0] <= numer20[46:20]) begin
res[20] = 1'b1;
numer19 = {numer20[46:20] - denom_pad[26:0], 20'b0};
end else begin
res[20] = 1'b0;
numer19 = numer20;
end
if (denom_pad[27:0] <= numer19[46:19]) begin
res[19] = 1'b1;
numer18 = {numer19[46:19] - denom_pad[27:0], 19'b0};
end else begin
res[19] = 1'b0;
numer18 = numer19;
end
if (denom_pad[28:0] <= numer18[46:18]) begin
res[18] = 1'b1;
numer17 = {numer18[46:18] - denom_pad[28:0], 18'b0};
end else begin
res[18] = 1'b0;
numer17 = numer18;
end
if (denom_pad[29:0] <= numer17[46:17]) begin
res[17] = 1'b1;
numer16 = {numer17[46:17] - denom_pad[29:0], 17'b0};
end else begin
res[17] = 1'b0;
numer16 = numer17;
end
if (denom_pad[30:0] <= numer16[46:16]) begin
res[16] = 1'b1;
numer15 = {numer16[46:16] - denom_pad[30:0], 16'b0};
end else begin
res[16] = 1'b0;
numer15 = numer16;
end
if (denom_pad[31:0] <= numer15[46:15]) begin
res[15] = 1'b1;
numer14 = {numer15[46:15] - denom_pad[31:0], 15'b0};
end else begin
res[15] = 1'b0;
numer14 = numer15;
end
if (denom_pad[32:0] <= numer14[46:14]) begin
res[14] = 1'b1;
numer13 = {numer14[46:14] - denom_pad[32:0], 14'b0};
end else begin
res[14] = 1'b0;
numer13 = numer14;
end
if (denom_pad[33:0] <= numer13[46:13]) begin
res[13] = 1'b1;
numer12 = {numer13[46:13] - denom_pad[33:0], 13'b0};
end else begin
res[13] = 1'b0;
numer12 = numer13;
end
if (denom_pad[34:0] <= numer12[46:12]) begin
res[12] = 1'b1;
numer11 = {numer12[46:12] - denom_pad[34:0], 12'b0};
end else begin
res[12] = 1'b0;
numer11 = numer12;
end
if (denom_pad[35:0] <= numer11[46:11]) begin
res[11] = 1'b1;
numer10 = {numer11[46:11] - denom_pad[35:0], 11'b0};
end else begin
res[11] = 1'b0;
numer10 = numer11;
end
if (denom_pad[36:0] <= numer10[46:10]) begin
res[10] = 1'b1;
numer9 = {numer10[46:10] - denom_pad[36:0], 10'b0};
end else begin
res[10] = 1'b0;
numer9 = numer10;
end
if (denom_pad[37:0] <= numer9[46:9]) begin
res[9] = 1'b1;
numer8 = {numer9[46:9] - denom_pad[37:0], 9'b0};
end else begin
res[9] = 1'b0;
numer8 = numer9;
end
if (denom_pad[38:0] <= numer8[46:8]) begin
res[8] = 1'b1;
numer7 = {numer8[46:8] - denom_pad[38:0], 8'b0};
end else begin
res[8] = 1'b0;
numer7 = numer8;
end
if (denom_pad[39:0] <= numer7[46:7]) begin
res[7] = 1'b1;
numer6 = {numer7[46:7] - denom_pad[39:0], 7'b0};
end else begin
res[7] = 1'b0;
numer6 = numer7;
end
if (denom_pad[40:0] <= numer6[46:6]) begin
res[6] = 1'b1;
numer5 = {numer6[46:6] - denom_pad[40:0], 6'b0};
end else begin
res[6] = 1'b0;
numer5 = numer6;
end
if (denom_pad[41:0] <= numer5[46:5]) begin
res[5] = 1'b1;
numer4 = {numer5[46:5] - denom_pad[41:0], 5'b0};
end else begin
res[5] = 1'b0;
numer4 = numer5;
end
if (denom_pad[42:0] <= numer4[46:4]) begin
res[4] = 1'b1;
numer3 = {numer4[46:4] - denom_pad[42:0], 4'b0};
end else begin
res[4] = 1'b0;
numer3 = numer4;
end
if (denom_pad[43:0] <= numer3[46:3]) begin
res[3] = 1'b1;
numer2 = {numer3[46:3] - denom_pad[43:0], 3'b0};
end else begin
res[3] = 1'b0;
numer2 = numer3;
end
if (denom_pad[44:0] <= numer2[46:2]) begin
res[2] = 1'b1;
numer1 = {numer2[46:2] - denom_pad[44:0], 2'b0};
end else begin
res[2] = 1'b0;
numer1 = numer2;
end
if (denom_pad[45:0] <= numer1[46:1]) begin
res[1] = 1'b1;
numer0 = {numer1[46:1] - denom_pad[45:0], 1'b0};
end else begin
res[1] = 1'b0;
numer0 = numer1;
end
if (denom_pad <= numer0) begin
res[0] = 1'b1;
end else begin
res[0] = 1'b0;
end
end
endmodule
//////////////////////////////////////////////
//
// constants.v
//
// Version 1.3
// Written 7/11/01 David_Harris@hmc.edu & Mark_Phair@hmc.edu
// Modifed 8/20/01 Mark_Phair@hmc.edu and Justin_Schauer@hmc.edu
//
// A set of constants for a parameterized floating point multiplier and adder.
//
//////////////////////////////////////////////
//////////////////////////////////////////////
// FREE VARIABLES
//////////////////////////////////////////////
// Widths of Fields
`define WEXP 8
`define WSIG 23
`define WFLAG 5
`define WCONTROL 5
// output flag select (flags[x])
`define DIVZERO 0
`define INVALID 1
`define INEXACT 2
`define OVERFLOW 3
`define UNDERFLOW 4
//////////////////////////////////////////////
// DEPENDENT VARIABLES
//////////////////////////////////////////////
`define WIDTH 32 //(`WEXP + `WSIG + 1)
`define PRODWIDTH 48 //(2 * (`WSIG + 1))
`define SHIFTWIDTH 96 //(2 * `PRODWIDTH))
`define WPRENORM 24 // `WSIG + 1
`define WEXPSUM 10 // `WEXP + 2
`define BIAS 127 // (2^(`WEXP)) - 1
`define WSIGMINUS1 22 // `WSIG - 1, used for rounding
`define WSHIFTAMT 5 // log2(`WSIG + 1) rounded up
// for trapped over/underflow
`define UNDERBIAS 192 // 3 * 2 ^ (`WEXP -2)
`define OVERBIAS -192 // -`UNDERBIAS
// specialized constants for fpadd
`define EXTRASIG 25 // `WSIG+2 this is the amount of precision needed so no
// subtraction errors occur
`define SHIFT 5 // # bits the max alignment shift will fit in (log2(`WSIG+2)
// rounded up to nearest int)
`define MAX_EXP 8'b11111110 // the maximum non-infinite exponent,
// `WEXP bits, the most significant
// `WEXP-1 bits are 1, the LSB is 0
`define INF_EXP 8'b11111111 // Infinity exponent, `WEXP bits, all 1
// Max significand, `WSIG bits, all 1
`define MAX_SIG 23'b11111111111111111111111
`define WEXP_0 8'b0 // Exponent equals `WEXP'b0
`define WEXP_1 8'b1 // Exponent equals one `WEXP'b1
`define WSIG_0 23'b0 // Significand equals zero `WSIG'b0
`define WSIG_1 23'b1 // Significand equals one `WSIG'b1
`define EXTRASIG_0 25'b0 // All result bits for adder zero `EXTRASIG'b0
// specialized constants for fpmul
`define MAXSHIFT 24 // `WSIG + 1
// GENERAL SPECIAL NUMBERS - Exp + Significand of special numbers
// plain NaN `WIDTH-1, all 1
`define CONSTNAN {9'b111111111,22'b0}
// zero `WIDTH-1, all 0
`define CONSTZERO 31'b0
// infinity `WEXP all 1, `WSIG all 0
`define CONSTINFINITY {8'b11111111, 23'b0}
// largest number maximum exponent(all 1's - 1) and maximum significand (all 1's)
`define CONSTLARGEST {`MAX_EXP, `MAX_SIG}
`define PRESHIFTZEROS 48'b0 // `PRODWIDTH'b0
//////////////////////////////////////////////
//
// fpmul.v
//
// Version 1.6
// Written 07/11/01 David_Harris@hmc.edu & Mark_Phair@hmc.edu
// Modifed 08/20/01 Mark_Phair@hmc.edu
//
// A parameterized floating point multiplier.
//
// BLOCK DESCRIPTIONS
//
// preprocess - general processing, such as zero detection, computing sign, NaN
//
// prenorm - normalize denorms
//
// exponent - sum the exponents, check for tininess before rounding
//
// multiply - multiply the mantissae
//
// special - calculate special cases, such as NaN and infinities
//
// shift - shift the sig and exp if nesc.
//
// round - round product
//
// normalize - normalizes the result if appropriate (i.e. not a denormalized #)
//
// flag - general flag processing
//
// assemble - assemble results
//
//////////////////////////////////////////////
//////////////////////////////////////////////
// Includes
//////////////////////////////////////////////
//////////////////////////////////////////////
// fpmul module
//////////////////////////////////////////////
module fpmul(clk, a, b, y_out, control, flags) ;
input clk;
// external signals
input [`WIDTH-1:0] a, b; // floating-point inputs
output [`WIDTH-1:0] y_out; // floating-point product
reg [`WIDTH-1:0] y_out;
input [1:0] control; // control including rounding mode
output [`WFLAG-1:0] flags; // DIVZERO, INVALID, INEXACT,
// OVERFLOW, UNDERFLOW (defined in constant.v)
//intermediate y_out
wire [`WIDTH-1:0]y;
// internal signals
wire multsign; // sign of product
wire specialsign; // sign of special
wire [`WSIG:0] norma; // normal-form mantissa a, 1 bit larger to hold leading 1
wire [`WSIG:0] normb; // normal-form mantissa b, 1 bit larger to hold leading 1
wire [`WEXPSUM-1:0] expa, expb; // the two exponents, after prenormalization
wire [`WEXPSUM-1:0] expsum; // sum of exponents (two's complement)
wire [`WEXPSUM-1:0] shiftexp; // shifted exponent
wire [`WEXP-1:0] roundexp; // rounded, correct exponent
wire [`PRODWIDTH-1:0] prod; // product of mantissae
wire [`PRODWIDTH-1:0] normalized; // Normalized product
wire [`SHIFTWIDTH-1:0] shiftprod; // shifted product
wire [`WSIG-1:0] roundprod; // rounded product
wire [`WIDTH-2:0] special; // special case exponent and product
wire twoormore; // product is outside range [1,2)
wire zero; // zero detected
wire infinity; // infinity detected
wire aisnan; // NaN detected in A
wire bisnan; // NaN detected in B
wire aisdenorm; // Denormalized number detected in A
wire bisdenorm; // Denormalized number detected in B
wire specialcase; // This is a special case
wire specialsigncase; // Use the special case sign
wire roundoverflow; // overflow in rounding, need to add 1 to exponent
wire invalid; // invalid operation
wire overflow; // exponent result too high, standard overflow
wire inexact; // inexact flag
wire shiftloss; // lost digits due to a shift, result inaccurate
wire [1:0] roundmode; // rounding mode information extracted from control field
wire tiny; // Result is tiny (denormalized #) after multiplication
wire stilltiny; // Result is tiny (denormalized #) after rounding
wire denormround; // rounding occured only because the initial result was
// a denormalized number. This is used to determine
// underflow in cases of denormalized numbers rounding
// up to normalized numbers
preprocess preprocesser(a, b, zero, aisnan, bisnan,
aisdenorm, bisdenorm, infinity,
control, roundmode, sign);
special specialer(a, b, special, specialsign, zero,
aisnan, bisnan,
infinity, invalid,
specialcase, specialsigncase);
prenorm prenormer(a[`WIDTH-2:0], b[`WIDTH-2:0], norma, normb, expa, expb, aisdenorm, bisdenorm);
multiply_a multiplier(norma, normb, prod, twoormore);
exponent exponenter(expa, expb, expsum, twoormore, tiny);
normalize normalizer(prod, normalized, tiny, twoormore);
shift shifter(normalized, expsum, shiftprod,
shiftexp, shiftloss);
round rounder(shiftprod, shiftexp, shiftloss,
roundprod, roundexp,
roundmode, sign, tiny, inexact,
overflow, stilltiny, denormround);
// *** To check for tininess before rounding, use tiny
// To check after rounding, use stilltiny
// *** for underflow detect:
// To check for inexact result use (inexact | (shiftloss & stilltiny)),
// To check for denormilization loss use (shiftloss & stilltiny)
// flag flager(invalid, overflow, inexact | shiftloss,
// shiftloss | inexact,
// /* tiny */ (stilltiny | (tiny & denormround)),
// specialcase, flags);
//ODIN cannot have operations in module instantiations.
wire inexact_or_shiftloss;
assign inexact_or_shiftloss = inexact | shiftloss;
wire shiftloss_or_inexact;
assign shiftloss_or_inexact = shiftloss | inexact;
wire still_tiny_or_tiny_and_denormround;
assign still_tiny_or_tiny_and_denormround = stilltiny | (tiny & denormround);
flag flager(invalid, overflow, inexact_or_shiftloss,
shiftloss_or_inexact,
/* tiny */ stilltiny_or_tiny_and_denormround,
specialcase, flags);
assemble assembler(roundprod, special, y,
sign, specialsign, roundexp,
specialcase, specialsigncase,
roundmode, flags[`OVERFLOW]);
always @ (posedge clk) begin
y_out <= y;
end
endmodule
module preprocess(a, b, zero, aisnan, bisnan, aisdenorm, bisdenorm, infinity, control, roundmode, sign);
// external signals
input [`WIDTH-1:0] a, b; // floating-point inputs
output zero; // is there a zero?
//input [`WCONTROL-1:0] control; // control field
input [1:0] control; //the rest is unused, not necessary for ODIN.
output [1:0] roundmode; // 00 = RN; 01 = RZ; 10 = RP; 11 = RM
output aisnan; // NaN detected in A
output bisnan; // NaN detected in B
output aisdenorm; // denormalized number detected in A
output bisdenorm; // denormalized number detected in B
output infinity; // infinity detected in A
output sign; // sign of product
// internal signals
wire signa, signb; // sign of a and b
wire [`WEXP-1:0] expa, expb; // the exponents of a and b
wire [`WSIG-1:0] siga, sigb; // the significands of a and b
wire aexpfull; // the exponent of a is all 1's
wire bexpfull; // the exponent of b is all 1's
wire aexpzero; // the exponent of a is all 0's
wire bexpzero; // the exponent of b is all 0's
wire asigzero; // the significand of a is all 0's
wire bsigzero; // the significand of b is all 0's
// Sign calculation
assign signa = a[`WIDTH-1];
assign signb = b[`WIDTH-1];
assign sign = signa ^ signb;
// Significand calcuations
assign siga = a[`WSIG-1:0];
assign sigb = b[`WSIG-1:0];
// Are the significands all 0's?
assign asigzero = ~|siga;
assign bsigzero = ~|sigb;
// Exponent calculations
assign expa = a[`WIDTH-2:`WIDTH-`WEXP-1];
assign expb = b[`WIDTH-2:`WIDTH-`WEXP-1];
// Are the exponents all 0's?
assign aexpzero = ~|expa;
assign bexpzero = ~|expb;
// Are the exponents all 1's?
assign aexpfull = &expa;
assign bexpfull = &expb;
// General calculations
// Zero Detect
assign zero = (aexpzero & asigzero) | (bexpzero & bsigzero);
// NaN detect
assign aisnan = aexpfull & ~asigzero;
assign bisnan = bexpfull & ~bsigzero;
// Infinity detect
assign infinity = (aexpfull & asigzero) | (bexpfull & bsigzero);
// Denorm detect
assign aisdenorm = aexpzero & ~asigzero;
assign bisdenorm = bexpzero & ~bsigzero;
// Round mode extraction
assign roundmode = control[1:0];
endmodule
module special (a, b, special, specialsign,
zero, aisnan, bisnan, infinity,
invalid, specialcase, specialsigncase);
// external signals
input [`WIDTH-1:0] a, b; // floating-point inputs
output [`WIDTH-2:0] special; // special case output, exp + sig
output specialsign; // the special-case sign
input zero; // is there a zero?
input aisnan; // NaN detected in A
input bisnan; // NaN detected in B
input infinity; // infinity detected
output invalid; // invalid operation
output specialcase; // this is a special case
output specialsigncase; // Use the special sign
// internal signals
wire infandzero; // infinity and zero detected
wire [`WIDTH-2:0] highernan; // holds inputed NaN, the higher if two are input,
// and dont care if neither a nor b are NaNs
wire aishighernan; // a is the higher NaN
assign infandzero = (infinity & zero);
//#######SPECIAL ASSIGNMENT######
// #######return higher NaN##########
// Use this block if you want to return the higher of two NaNs
assign aishighernan = (aisnan & ((a[`WSIG-1:0] >= b[`WSIG-1:0]) | ~bisnan));
assign highernan[`WIDTH-2:0] = aishighernan ? a[`WIDTH-2:0] : b[`WIDTH-2:0];
assign special[`WIDTH-2:0] = (aisnan | bisnan) ? (highernan[`WIDTH-2:0]) :
(zero ?
(infinity ? (`CONSTNAN) : (`CONSTZERO)) : (`CONSTINFINITY));
// #######return first NaN##########
// Use this block to return the first NaN encountered
// assign special = aisnan ? (a[`WIDTH-2:0]) :
// (bisnan ? (b[`WIDTH-2:0]) :
// (zero ?
// (infinity ? (`CONSTNAN) : (`CONSTZERO)) : (`CONSTINFINITY)));
//######END SPECIAL ASSIGNMENT#######
assign specialcase = zero | aisnan | bisnan | infinity;
assign invalid = infandzero; //*** need to include something about signaling NaNs here
// dont need to check if b is NaN, if it defaults to that point, and b isnt NAN
// then it wont be used anyway
assign specialsign = infandzero ? (1'b1) : (aishighernan ? a[`WIDTH-1] : b[`WIDTH-1]);
assign specialsigncase = infandzero | aisnan | bisnan;
endmodule
module prenorm(a, b, norma, normb, modexpa, modexpb, aisdenorm, bisdenorm);
//input [`WIDTH-1:0] a, b; // the input floating point numbers
input [`WIDTH-2:0] a, b; //We don't need bit 31 here, unused in ODIN.
output [`WSIG:0] norma, normb; // the mantissae in normal form
output [`WEXPSUM-1:0] modexpa, modexpb; // the output exponents, larger to accomodate
// two's complement form
input aisdenorm; // a is a denormalized number
input bisdenorm; // b is a denormalized nubmer
// internal signals
wire [`WEXPSUM-1:0] expa, expb; // exponents in two's complement form
// are negative if shifted for a
// denormalized number
wire [`SHIFT-1:0] shifta, shiftb; // the shift amounts
reg [`WSIG:0] shifteda, shiftedb; // the shifted significands, used to be wire, changed for ODIN.
// pull out the exponents
assign expa = a[`WIDTH-2:`WIDTH-1-`WEXP];
assign expb = b[`WIDTH-2:`WIDTH-1-`WEXP];
// when breaking appart for paramaterizing:
// ### RUN ./prenormshift.pl wsig_in ###
assign shifta = a[23 - 1] ? 1 :
a[23 - 2] ? 2 :
a[23 - 3] ? 3 :
a[23 - 4] ? 4 :
a[23 - 5] ? 5 :
a[23 - 6] ? 6 :
a[23 - 7] ? 7 :
a[23 - 8] ? 8 :
a[23 - 9] ? 9 :
a[23 - 10] ? 10 :
a[23 - 11] ? 11 :
a[23 - 12] ? 12 :
a[23 - 13] ? 13 :
a[23 - 14] ? 14 :
a[23 - 15] ? 15 :
a[23 - 16] ? 16 :
a[23 - 17] ? 17 :
a[23 - 18] ? 18 :
a[23 - 19] ? 19 :
a[23 - 20] ? 20 :
a[23 - 21] ? 21 :
a[23 - 22] ? 22 :
23; // dont need to check last bit
// if the second to last isn't 1, then the last one must be
assign shiftb = b[23 - 1] ? 1 :
b[23 - 2] ? 2 :
b[23 - 3] ? 3 :
b[23 - 4] ? 4 :
b[23 - 5] ? 5 :
b[23 - 6] ? 6 :
b[23 - 7] ? 7 :
b[23 - 8] ? 8 :
b[23 - 9] ? 9 :
b[23 - 10] ? 10 :
b[23 - 11] ? 11 :
b[23 - 12] ? 12 :
b[23 - 13] ? 13 :
b[23 - 14] ? 14 :
b[23 - 15] ? 15 :
b[23 - 16] ? 16 :
b[23 - 17] ? 17 :
b[23 - 18] ? 18 :
b[23 - 19] ? 19 :
b[23 - 20] ? 20 :
b[23 - 21] ? 21 :
b[23 - 22] ? 22 :
23; // dont need to check last bit
// if the second to last isn't 1, then the last one must be
// If number is a denorm, the exponent must be
// decremented by the shift amount
assign modexpa = aisdenorm ? 1 - shifta : expa;
assign modexpb = bisdenorm ? 1 - shiftb : expb;
// If number is denorm, shift the significand the appropriate amount
// assign shifteda = a[`WSIG-1:0] << shifta;
//Must have constant shifts for ODIN
always @ (shifta or a) begin
case (shifta)
5'b00001: begin
shifteda = a[`WSIG-1:0] << 5'b00001;
end
5'b00010: begin
shifteda = a[`WSIG-1:0] << 5'b00010;
end
5'b00011: begin
shifteda = a[`WSIG-1:0] << 5'b00011;
end
5'b00100: begin
shifteda = a[`WSIG-1:0] << 5'b00100;
end
5'b00101: begin
shifteda = a[`WSIG-1:0] << 5'b00101;
end
5'b00110: begin
shifteda = a[`WSIG-1:0] << 5'b00110;
end
5'b00111: begin
shifteda = a[`WSIG-1:0] << 5'b00111;
end
5'b01000: begin
shifteda = a[`WSIG-1:0] << 5'b01000;
end
5'b01001: begin
shifteda = a[`WSIG-1:0] << 5'b01001;
end
5'b01010: begin
shifteda = a[`WSIG-1:0] << 5'b01010;
end
5'b01011: begin
shifteda = a[`WSIG-1:0] << 5'b01011;
end
5'b01100: begin
shifteda = a[`WSIG-1:0] << 5'b01100;
end
5'b01101: begin
shifteda = a[`WSIG-1:0] << 5'b01101;
end
5'b01110: begin
shifteda = a[`WSIG-1:0] << 5'b01110;
end
5'b01111: begin
shifteda = a[`WSIG-1:0] << 5'b01111;
end
5'b10000: begin
shifteda = a[`WSIG-1:0] << 5'b10000;
end
5'b10001: begin
shifteda = a[`WSIG-1:0] << 5'b10001;
end
5'b10010: begin
shifteda = a[`WSIG-1:0] << 5'b10010;
end
5'b10011: begin
shifteda = a[`WSIG-1:0] << 5'b10011;
end
5'b10100: begin
shifteda = a[`WSIG-1:0] << 5'b10100;
end
5'b10101: begin
shifteda = a[`WSIG-1:0] << 5'b10101;
end
5'b10110: begin
shifteda = a[`WSIG-1:0] << 5'b10110;
end
5'b10111: begin
shifteda = a[`WSIG-1:0] << 5'b10111;
end
default: begin //Won't be higher than 23.
shifteda = a[`WSIG-1:0];
end
endcase
end
assign norma = aisdenorm ? shifteda : {1'b1, a[`WSIG-1:0]};
// assign shiftedb = b[`WSIG-1:0] << shiftb;
always @ (shiftb or b) begin
case (shiftb)
5'b00001: begin
shiftedb = b[`WSIG-1:0] << 5'b00001;
end
5'b00010: begin
shiftedb = b[`WSIG-1:0] << 5'b00010;
end
5'b00011: begin
shiftedb = b[`WSIG-1:0] << 5'b00011;
end
5'b00100: begin
shiftedb = b[`WSIG-1:0] << 5'b00100;
end
5'b00101: begin
shiftedb = b[`WSIG-1:0] << 5'b00101;
end
5'b00110: begin
shiftedb = b[`WSIG-1:0] << 5'b00110;
end
5'b00111: begin
shiftedb = b[`WSIG-1:0] << 5'b00111;
end
5'b01000: begin
shiftedb = b[`WSIG-1:0] << 5'b01000;
end
5'b01001: begin
shiftedb = b[`WSIG-1:0] << 5'b01001;
end
5'b01010: begin
shiftedb = b[`WSIG-1:0] << 5'b01010;
end
5'b01011: begin
shiftedb = b[`WSIG-1:0] << 5'b01011;
end
5'b01100: begin
shiftedb = b[`WSIG-1:0] << 5'b01100;
end
5'b01101: begin
shiftedb = b[`WSIG-1:0] << 5'b01101;
end
5'b01110: begin
shiftedb = b[`WSIG-1:0] << 5'b01110;
end
5'b01111: begin
shiftedb = b[`WSIG-1:0] << 5'b01111;
end
5'b10000: begin
shiftedb = b[`WSIG-1:0] << 5'b10000;
end
5'b10001: begin
shiftedb = b[`WSIG-1:0] << 5'b10001;
end
5'b10010: begin
shiftedb = b[`WSIG-1:0] << 5'b10010;
end
5'b10011: begin
shiftedb = b[`WSIG-1:0] << 5'b10011;
end
5'b10100: begin
shiftedb = b[`WSIG-1:0] << 5'b10100;
end
5'b10101: begin
shiftedb = b[`WSIG-1:0] << 5'b10101;
end
5'b10110: begin
shiftedb = b[`WSIG-1:0] << 5'b10110;
end
5'b10111: begin
shiftedb = b[`WSIG-1:0] << 5'b10111;
end
default: begin // Won't be higher than 23.
shiftedb = b[`WSIG-1:0];
end
endcase
end
assign normb = bisdenorm ? shiftedb : {1'b1, b[`WSIG-1:0]};
endmodule
module multiply_a (norma, normb, prod, twoormore);
input [`WSIG:0] norma, normb; // normalized mantissae
output [`PRODWIDTH-1:0] prod; // product of mantissae
output twoormore; // Product overflowed range [1,2)
// multiplier array
// (*** need a more effecient multiplier,
// designware might work, though)
assign prod = norma * normb;
// did the multiply overflow the range [1,2)?
assign twoormore = prod[`PRODWIDTH-1];
endmodule
module exponent(expa, expb, expsum, twoormore, tiny);
input [`WEXPSUM-1:0] expa, expb; // the input exponents in 2's complement form
// to accomodate denorms that have been
// prenormalized
input twoormore; // product is outside range [1,2)
output [`WEXPSUM-1:0] expsum; // the sum of the exponents
output tiny; // Result is tiny (denormalized #)
// Sum the exponents, subtract the bias
// and add 1 (twoormore) if multiply went out of [1,2) range
assign expsum = expa + expb - `BIAS + twoormore;
// The result is tiny if the exponent is less than 1.
// Because the exponent sum is in 2's-complement form,
// it is negative if the first bit is 1, and zero if
// all the bits are zero
assign tiny = ~|expsum[`WEXPSUM-2:0] | expsum[`WEXPSUM-1];
endmodule
module normalize(prod, normalized, tiny, twoormore);
// external signals
input [`PRODWIDTH-1:0] prod; // Product of multiplication
output [`PRODWIDTH-1:0] normalized; // Normalized product
input tiny; // Result is tiny (denormalized #)
input twoormore; // Product overflowed range [1,2)
// normalize product if appropriate
// There are three possible cases here:
// 1) tiny and prod overfl. [1,2) -> take the whole prod, including the leading 1
// 2) tiny or prod overfl. [1,2) -> dont take the first bit. its zero if its tiny,
// and it's the implied 1 if its not
// 3) neither tiny nor prod overfl.-> dont take the first 2 bits, the 2nd one is the
// implied 1
assign normalized = (tiny & twoormore) ? prod[`PRODWIDTH-1:0] :
((tiny ^ twoormore) ? {prod[`PRODWIDTH-2:0],1'b0} :
{prod[`PRODWIDTH-3:0],2'b0});
endmodule
module shift(normalized, selectedexp, shiftprod, shiftexp, shiftloss);
// external signals
input [`PRODWIDTH-1:0] normalized; // normalized product of mantissae
input [`WEXPSUM-1:0] selectedexp; // sum of exponents
output [`SHIFTWIDTH-1:0] shiftprod; // shifted and normalized product
output [`WEXPSUM-1:0] shiftexp; // shifted exponent
output shiftloss; // loss of accuaracy due to shifting
// internal signals
wire [`WEXPSUM-1:0] roundedexp; // selected exponent + 1 if rounding caused overflow
// wire negexp; // exponent is negative
wire [`WEXPSUM-1:0] shiftamt; // theoretical amount to shift product by
wire [`WSHIFTAMT-1:0] actualshiftamt; // actual amount to shift product by
wire tozero; // need more shifts than possible with width of significand
wire doshift; // only shift if value is nonnegative
wire [`SHIFTWIDTH-1:0] preshift; // value before shifting, with more room to ensure lossless shifting
reg [`SHIFTWIDTH-1:0] postshift; // value after shifting, with more room to ensure lossless shifting, used to be wire, changed for ODIN.
// set up value for shifting
assign preshift = {normalized, `PRESHIFTZEROS};
// determine shift amount
assign shiftamt = -selectedexp;
// make sure shift amount is nonnegative
// If the exponent is negative, the shift amount should
// come out positive, otherwise there shouldn't be any
// shifting to be done
assign doshift = ~shiftamt[`WEXPSUM-1];
// Determine if the result must be shifted more than
// will show up in the significand, even if it rounds up
assign tozero = doshift & (shiftamt > `MAXSHIFT);
// If the shift is big enough to shift all the bits out of the final significand,
// then it stops being relevent how much it has been shifted.
assign actualshiftamt = tozero ? `MAXSHIFT : shiftamt[`WSHIFTAMT-1:0];
// shift significand
//assign postshift = preshift >> actualshiftamt;
//We can only have constant shifts for ODIN:
always @ (actualshiftamt or preshift) begin
case (actualshiftamt)
5'b00001: begin
postshift = preshift >> 5'b00001;
end
5'b00010: begin
postshift = preshift >> 5'b00010;
end
5'b00011: begin
postshift = preshift >> 5'b00011;
end
5'b00100: begin
postshift = preshift >> 5'b00100;
end
5'b00101: begin
postshift = preshift >> 5'b00101;
end
5'b00110: begin
postshift = preshift >> 5'b00110;
end
5'b00111: begin
postshift = preshift >> 5'b00111;
end
5'b01000: begin
postshift = preshift >> 5'b01000;
end
5'b01001: begin
postshift = preshift >> 5'b01001;
end
5'b01010: begin
postshift = preshift >> 5'b01010;
end
5'b01011: begin
postshift = preshift >> 5'b01011;
end
5'b01100: begin
postshift = preshift >> 5'b01100;
end
5'b01101: begin
postshift = preshift >> 5'b01101;
end
5'b01110: begin
postshift = preshift >> 5'b01110;
end
5'b01111: begin
postshift = preshift >> 5'b01111;
end
5'b10000: begin
postshift = preshift >> 5'b10000;
end
5'b10001: begin
postshift = preshift >> 5'b10001;
end
5'b10010: begin
postshift = preshift >> 5'b10010;
end
5'b10011: begin
postshift = preshift >> 5'b10011;
end
5'b10100: begin
postshift = preshift >> 5'b10100;
end
5'b10101: begin
postshift = preshift >> 5'b10101;
end
5'b10110: begin
postshift = preshift >> 5'b10110;
end
5'b10111: begin
postshift = preshift >> 5'b10111;
end
5'b11000: begin
postshift = preshift >> 5'b11000;
end
5'b11001: begin
postshift = preshift >> 5'b11001;
end
5'b11010: begin
postshift = preshift >> 5'b11010;
end
5'b11011: begin
postshift = preshift >> 5'b11011;
end
5'b11100: begin
postshift = preshift >> 5'b11100;
end
5'b11101: begin
postshift = preshift >> 5'b11101;
end
5'b11110: begin
postshift = preshift >> 5'b11110;
end
5'b11111: begin
postshift = preshift >> 5'b11111;
end
default: begin
postshift = preshift;
end
endcase
end
// assign appropriate significand
assign shiftprod = doshift ? postshift : preshift;
// determine if any bits were lost from the shift
//assign shiftloss = tozero | (negexp & |postshift[`WSIG-1:0]);
assign shiftloss = tozero | (doshift & |postshift[`SHIFTWIDTH-`PRODWIDTH-1:0]);
// assign appropriate exponent
assign shiftexp = doshift ? 0 : selectedexp;
endmodule
module round(shiftprod, shiftexp, shiftloss, roundprod, roundexp, roundmode,
sign, tiny, inexact, overflow, stilltiny, denormround);
// external signals
input [`SHIFTWIDTH-1:0] shiftprod; // normalized and shifted product of mantissae
input [`WEXPSUM-1:0] shiftexp; // shifted exponent
input shiftloss; // bits were lost in the shifting process
output [`WSIG-1:0] roundprod; // rounded floating-point product
output [`WEXP-1:0] roundexp; // rounded exponent
input [1:0] roundmode; // 00 = RN; 01 = RZ; 10 = RP; 11 = RM
input sign; // sign bit for rounding mode direction
input tiny; // denormalized number after rounding
output inexact; // rounding occured
output overflow; // overflow occured
output stilltiny; // Result is tiny (denormalized #) after rounding
output denormround; // result was rounded only because it was a denormalized number
// internal signals
wire roundzero; // rounding towards zero
wire roundinf; // rounding towards infinity
wire stickybit; // there one or more 1 bits in the LS bits
wire denormsticky; // sticky bit if this weren't a denorm
wire [`WSIG-1:0] MSBits; // most significant bits
wire [`WSIG:0] MSBitsplus1; // most significant bits plus 1
// for rounding purposes. needs to be one
// bit bigger for overflow
wire [1:0] roundbits; // bits used to compute rounding decision
wire rounddecision; // round up
wire roundoverflow; // rounding overflow occured
wire [`WEXPSUM-1:0] tempexp; // exponent after rounding
//reduce round mode to three modes
// dont need round nearest, it is implied
// by roundzero and roundinf being false
//assign roundnearest = ~&roundmode;
// assign roundzero = &roundmode || (^roundmode && (roundmode[0] || sign));
assign roundzero = (~roundmode[1] & roundmode[0]) | (roundmode[1] & (roundmode[0] ^ sign));
assign roundinf = roundmode[1] & ~(sign ^ roundmode[0]);
// pull out the most significant bits for the product
assign MSBits = shiftprod[`SHIFTWIDTH-1:`SHIFTWIDTH-`WSIG];
// add a 1 to the end of MSBits for round up
assign MSBitsplus1 = MSBits + 1;
// pull out the last of the most significant bits
// and the first of the least significant bits
// to use for calculating the rounding decision
assign roundbits[1:0] = shiftprod[`SHIFTWIDTH-`WSIG:`SHIFTWIDTH-`WSIG-1];
// calculate the sticky bit. Are any of the least significant bits 1?
// also: was anything lost while shifting?
// *** Optimization: some of these bits are already checked from the shiftloss ***
// *** Optimization: stickybit can be calculated from denormsticky
// with only 1 more gate, instead of duplication of effort ***
assign stickybit = |shiftprod[`SHIFTWIDTH-`WSIG-2:0] | shiftloss;
assign denormsticky = |shiftprod[`SHIFTWIDTH-`WSIG-3:0] | shiftloss;
// Compute rounding decision
assign rounddecision = ~roundzero & ( (roundbits[0] & (roundinf | roundbits[1]))
| (stickybit & (roundinf | roundbits[0]))
);
// Was this only rounded because it is a denorm?
assign denormround = tiny & rounddecision & ~denormsticky & roundbits[0];
// detect rounding overflow. it only overflows if:
// 1) the top bit of MSBitsplus1 is 1
// 2) it decides to round up
assign roundoverflow = MSBitsplus1[`WSIG] & rounddecision;
// assign significand (and postnormalize)
// rounddecision decides whether to use msbits+1 or msbits.
// if using msbits+1 and there is an rounding overflow (i.e. result=2),
// then should return 1 instead
assign roundprod = rounddecision ?
(roundoverflow ? 0 :
MSBitsplus1[`WSIG-1:0]) :
MSBits;
// detect inexact
assign inexact = rounddecision | stickybit | roundbits[0];
// compensate for a rounding overflow
assign tempexp = roundoverflow + shiftexp;
// check for overflow in exponent
// overflow occured if the number
// is too large to be represented,
// i.e. can't fit in `WEXP bits, or
// all `WEXP bits are 1's
assign overflow = &tempexp[`WEXP-1:0] | |tempexp[`WEXPSUM-1:`WEXP];
// two possible cases:
// 1) Overflow: then exponent doesnt matter,
// it will be changed to infinity anyway
// 2) not overflow: the leading bits will be 0
assign roundexp = tempexp[`WEXP-1:0];
// The result is tiny if the exponent is less than 1.
// Because the exponent sum is NOT in 2's-complement form,
// it is only less than one if its is zero, i.e.
// all the bits are 0
assign stilltiny = ~|roundexp;
endmodule
module flag (invalid, overflow, inexact, underflow, tiny, specialcase, flags);
input invalid; // invalid operation
input overflow; // the result was too large
input inexact; // The result was rounded
input specialcase; // Using special result, shouldn't throw flags
input underflow; // Underflow detected
input tiny; // The result is tiny
output [`WFLAG-1:0] flags; // DIVZERO, INVALID, INEXACT,
// OVERFLOW, UNDERFLOW (defined in constant.v)
// flags
assign flags[`DIVZERO] = 1'b0;
assign flags[`INVALID] = invalid;
assign flags[`INEXACT] = ~specialcase & (inexact | underflow | overflow);
assign flags[`OVERFLOW] = ~specialcase & overflow;
assign flags[`UNDERFLOW] = tiny; //~specialcase & tiny & underflow & ~overflow;
endmodule
module assemble(roundprod, special, y, sign, specialsign,
shiftexp, specialcase, specialsigncase,
roundmode, overflow);
// external signals
input [`WSIG-1:0] roundprod; // shifted, rounded and normalized
// product of mantissae
input [`WIDTH-2:0] special; // special case product + exponent
output [`WIDTH-1:0] y; // floating-point product
input sign; // sign of product (+ = 0, - = 1)
input specialsign; // special case sign
input [`WEXP-1:0] shiftexp; // shifted exponent
input specialcase; // this is a special case
input specialsigncase; // use the special case sign
input [1:0] roundmode; // rounding mode information extracted from control field
input overflow; // overflow detected
// internal signals
wire [`WIDTH-2:0] rounded; // final product + exponent
wire [`WIDTH-2:0] overflowvalue; // product + exponent for overflow condition
wire undenormed; // the result was denormalized before rounding, but rounding
// caused it to become a small normalized number.
// SET UP ROUNDED PRODUCT + EXPONENT
// assign significand
assign rounded[`WSIG-1:0] = roundprod;
// assign exponent
assign rounded[`WIDTH-2:`WIDTH-`WEXP-1] = shiftexp;
// SET UP OVERFLOW CONDITION
assign overflowvalue[`WIDTH-2:0] = roundmode[1] ?
(sign ^ roundmode[0] ? `CONSTLARGEST : `CONSTINFINITY) :
(roundmode[0] ? `CONSTLARGEST: `CONSTINFINITY);
// FINAL PRODUCT ASSIGN
// assign sign
assign y[`WIDTH-1] = specialsigncase ? specialsign : sign;
// assign product vs special vs overflowed
assign y[`WIDTH-2:0] = specialcase ? special[`WIDTH-2:0] :
(overflow ? overflowvalue[`WIDTH-2:0] :
rounded[`WIDTH-2:0]);
endmodule
//---------------------------------------
// A dual-port RAM
// This module is tuned for VTR's benchmarks
//---------------------------------------
module dual_port_ram (
input clk,
input we1,
input we2,
input [`rRAMSIZEWIDTH - 1 : 0] addr1,
input [`RAMWIDTH - 1 : 0] data1,
output [`RAMWIDTH - 1 : 0] out1,
input [`rRAMSIZEWIDTH - 1 : 0] addr2,
input [`RAMWIDTH - 1 : 0] data2,
output [`RAMWIDTH - 1 : 0] out2
);
reg [`RAMWIDTH - 1 : 0] ram[2**`rRAMSIZEWIDTH - 1 : 0];
reg [`RAMWIDTH - 1 : 0] data_out1;
reg [`RAMWIDTH - 1 : 0] data_out2;
assign out1 = data_out1;
assign out2 = data_out2;
// If writen enable 1 is activated,
// data1 will be loaded through addr1
// Otherwise, data will be read out through addr1
always @(posedge clk) begin
if (we1) begin
ram[addr1] <= data1;
end else begin
data_out1 <= ram[addr1];
end
end
// If writen enable 2 is activated,
// data1 will be loaded through addr2
// Otherwise, data will be read out through addr2
always @(posedge clk) begin
if (we2) begin
ram[addr2] <= data2;
end else begin
data_out2 <= ram[addr2];
end
end
endmodule
//---------------------------------------
// A dual-port RAM 4096x32
// This module is tuned for VTR's benchmarks
//---------------------------------------
module dual_port_ram_4096x32 (
input clk,
input we1,
input we2,
input [12 - 1 : 0] addr1,
input [32 - 1 : 0] data1,
output [32 - 1 : 0] out1,
input [12 - 1 : 0] addr2,
input [32 - 1 : 0] data2,
output [32 - 1 : 0] out2
);
reg [32 - 1 : 0] ram[2**12 - 1 : 0];
reg [32 - 1 : 0] data_out1;
reg [32 - 1 : 0] data_out2;
assign out1 = data_out1;
assign out2 = data_out2;
// If writen enable 1 is activated,
// data1 will be loaded through addr1
// Otherwise, data will be read out through addr1
always @(posedge clk) begin
if (we1) begin
ram[addr1] <= data1;
end else begin
data_out1 <= ram[addr1];
end
end
// If writen enable 2 is activated,
// data1 will be loaded through addr2
// Otherwise, data will be read out through addr2
always @(posedge clk) begin
if (we2) begin
ram[addr2] <= data2;
end else begin
data_out2 <= ram[addr2];
end
end
endmodule
//---------------------------------------
// A dual-port RAM rFIFO
// This module is tuned for VTR's benchmarks
//---------------------------------------
module dual_port_ram_rfifo (
input clk,
input we1,
input we2,
input [`rFIFOSIZEWIDTH - 1 : 0] addr1,
input [`rFIFOINPUTWIDTH - 1 : 0] data1,
output [`rFIFOINPUTWIDTH - 1 : 0] out1,
input [`rFIFOSIZEWIDTH - 1 : 0] addr2,
input [`rFIFOINPUTWIDTH - 1 : 0] data2,
output [`rFIFOINPUTWIDTH - 1 : 0] out2
);
reg [`rFIFOINPUTWIDTH - 1 : 0] ram[2**`rFIFOSIZEWIDTH - 1 : 0];
reg [`rFIFOINPUTWIDTH - 1 : 0] data_out1;
reg [`rFIFOINPUTWIDTH - 1 : 0] data_out2;
assign out1 = data_out1;
assign out2 = data_out2;
// If writen enable 1 is activated,
// data1 will be loaded through addr1
// Otherwise, data will be read out through addr1
always @(posedge clk) begin
if (we1) begin
ram[addr1] <= data1;
end else begin
data_out1 <= ram[addr1];
end
end
// If writen enable 2 is activated,
// data1 will be loaded through addr2
// Otherwise, data will be read out through addr2
always @(posedge clk) begin
if (we2) begin
ram[addr2] <= data2;
end else begin
data_out2 <= ram[addr2];
end
end
endmodule
//---------------------------------------
// A dual-port RAM wFIFO
// This module is tuned for VTR's benchmarks
//---------------------------------------
module dual_port_ram_wfifo (
input clk,
input we1,
input we2,
input [`wFIFOSIZEWIDTH - 1 : 0] addr1,
input [`wFIFOINPUTWIDTH - 1 : 0] data1,
output [`wFIFOINPUTWIDTH - 1 : 0] out1,
input [`wFIFOSIZEWIDTH - 1 : 0] addr2,
input [`wFIFOINPUTWIDTH - 1 : 0] data2,
output [`wFIFOINPUTWIDTH - 1 : 0] out2
);
reg [`wFIFOINPUTWIDTH - 1 : 0] ram[2**`wFIFOSIZEWIDTH - 1 : 0];
reg [`wFIFOINPUTWIDTH - 1 : 0] data_out1;
reg [`wFIFOINPUTWIDTH - 1 : 0] data_out2;
assign out1 = data_out1;
assign out2 = data_out2;
// If writen enable 1 is activated,
// data1 will be loaded through addr1
// Otherwise, data will be read out through addr1
always @(posedge clk) begin
if (we1) begin
ram[addr1] <= data1;
end else begin
data_out1 <= ram[addr1];
end
end
// If writen enable 2 is activated,
// data1 will be loaded through addr2
// Otherwise, data will be read out through addr2
always @(posedge clk) begin
if (we2) begin
ram[addr2] <= data2;
end else begin
data_out2 <= ram[addr2];
end
end
endmodule
//---------------------------------------
// A dual-port RAM wFIFO
// This module is tuned for VTR's benchmarks
//---------------------------------------
module dual_port_ram_afifo (
input clk,
input we1,
input we2,
input [`aFIFOSIZEWIDTH - 1 : 0] addr1,
input [`aFIFOWIDTH - 1 : 0] data1,
output [`aFIFOWIDTH - 1 : 0] out1,
input [`aFIFOSIZEWIDTH - 1 : 0] addr2,
input [`aFIFOWIDTH - 1 : 0] data2,
output [`aFIFOWIDTH - 1 : 0] out2
);
reg [`aFIFOWIDTH - 1 : 0] ram[2**`aFIFOSIZEWIDTH - 1 : 0];
reg [`aFIFOWIDTH - 1 : 0] data_out1;
reg [`aFIFOWIDTH - 1 : 0] data_out2;
assign out1 = data_out1;
assign out2 = data_out2;
// If writen enable 1 is activated,
// data1 will be loaded through addr1
// Otherwise, data will be read out through addr1
always @(posedge clk) begin
if (we1) begin
ram[addr1] <= data1;
end else begin
data_out1 <= ram[addr1];
end
end
// If writen enable 2 is activated,
// data1 will be loaded through addr2
// Otherwise, data will be read out through addr2
always @(posedge clk) begin
if (we2) begin
ram[addr2] <= data2;
end else begin
data_out2 <= ram[addr2];
end
end
endmodule
//---------------------------------------
// A dual-port RAM mFIFO
// This module is tuned for VTR's benchmarks
//---------------------------------------
module dual_port_ram_mfifo (
input clk,
input we1,
input we2,
input [`mFIFOSIZEWIDTH - 1 : 0] addr1,
input [`mFIFOWIDTH - 1 : 0] data1,
output [`mFIFOWIDTH - 1 : 0] out1,
input [`mFIFOSIZEWIDTH - 1 : 0] addr2,
input [`mFIFOWIDTH - 1 : 0] data2,
output [`mFIFOWIDTH - 1 : 0] out2
);
reg [`mFIFOWIDTH - 1 : 0] ram[2**`mFIFOSIZEWIDTH - 1 : 0];
reg [`mFIFOWIDTH - 1 : 0] data_out1;
reg [`mFIFOWIDTH - 1 : 0] data_out2;
assign out1 = data_out1;
assign out2 = data_out2;
// If writen enable 1 is activated,
// data1 will be loaded through addr1
// Otherwise, data will be read out through addr1
always @(posedge clk) begin
if (we1) begin
ram[addr1] <= data1;
end else begin
data_out1 <= ram[addr1];
end
end
// If writen enable 2 is activated,
// data1 will be loaded through addr2
// Otherwise, data will be read out through addr2
always @(posedge clk) begin
if (we2) begin
ram[addr2] <= data2;
end else begin
data_out2 <= ram[addr2];
end
end
endmodule