diff --git a/CHANGELOG b/CHANGELOG index ae7d28236..44d83c1bf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ Yosys 0.9 .. Yosys 0.9-dev - Added "synth_ice40 -abc9" (experimental) - Added "synth -abc9" (experimental) - Added "script -scriptwire + - "synth_xilinx" to now infer wide multiplexers (-widemux to enable) Yosys 0.8 .. Yosys 0.8-dev diff --git a/techlibs/xilinx/Makefile.inc b/techlibs/xilinx/Makefile.inc index e9ea10e48..17c5df37d 100644 --- a/techlibs/xilinx/Makefile.inc +++ b/techlibs/xilinx/Makefile.inc @@ -30,6 +30,7 @@ $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/drams_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/arith_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/ff_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/lut_map.v)) +$(eval $(call add_share_file,share/xilinx,techlibs/xilinx/mux_map.v)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/abc_xc7.box)) $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/abc_xc7.lut)) diff --git a/techlibs/xilinx/abc_xc7.box b/techlibs/xilinx/abc_xc7.box index 6dd71d758..3789ff350 100644 --- a/techlibs/xilinx/abc_xc7.box +++ b/techlibs/xilinx/abc_xc7.box @@ -3,17 +3,22 @@ # NB: Inputs/Outputs must be ordered alphabetically # (with exceptions for carry in/out) -# F7BMUX slower than F7AMUX +# Average across F7[AB]MUX # Inputs: I0 I1 S0 # Outputs: O -F7BMUX 1 1 3 1 -217 223 296 +F7MUX 1 1 3 1 +204 208 286 # Inputs: I0 I1 S0 # Outputs: O MUXF8 2 1 3 1 104 94 273 +# Inputs: I0 I1 I2 I3 S0 S1 +# Outputs: O +$__MUXF78 3 1 6 1 +294 297 311 317 390 273 + # CARRY4 + CARRY4_[ABCD]X # Inputs: CYINIT DI0 DI1 DI2 DI3 S0 S1 S2 S3 CI # Outputs: O0 O1 O2 O3 CO0 CO1 CO2 CO3 @@ -21,7 +26,7 @@ MUXF8 2 1 3 1 # input/output and the entire bus has been # moved there overriding the otherwise # alphabetical ordering) -CARRY4 3 1 10 8 +CARRY4 4 1 10 8 482 - - - - 223 - - - 222 598 407 - - - 400 205 - - 334 584 556 537 - - 523 558 226 - 239 @@ -34,20 +39,20 @@ CARRY4 3 1 10 8 # SLICEM/A6LUT # Inputs: A0 A1 A2 A3 A4 D DPRA0 DPRA1 DPRA2 DPRA3 DPRA4 WCLK WE # Outputs: DPO SPO -RAM32X1D 4 0 13 2 +RAM32X1D 5 0 13 2 - - - - - - 631 472 407 238 127 - - 631 472 407 238 127 - - - - - - - - # SLICEM/A6LUT # Inputs: A0 A1 A2 A3 A4 A5 D DPRA0 DPRA1 DPRA2 DPRA3 DPRA4 DPRA5 WCLK WE # Outputs: DPO SPO -RAM64X1D 5 0 15 2 +RAM64X1D 6 0 15 2 - - - - - - - 642 631 472 407 238 127 - - 642 631 472 407 238 127 - - - - - - - - - # SLICEM/A6LUT + F7[AB]MUX # Inputs: A0 A1 A2 A3 A4 A5 A6 D DPRA0 DPRA1 DPRA2 DPRA3 DPRA4 DPRA5 DPRA6 WCLK WE # Outputs: DPO SPO -RAM128X1D 6 0 17 2 +RAM128X1D 7 0 17 2 - - - - - - - - 1009 998 839 774 605 494 450 - - 1047 1036 877 812 643 532 478 - - - - - - - - - - diff --git a/techlibs/xilinx/cells_map.v b/techlibs/xilinx/cells_map.v index 89671c0fc..2eb9fa2c1 100644 --- a/techlibs/xilinx/cells_map.v +++ b/techlibs/xilinx/cells_map.v @@ -93,11 +93,8 @@ module \$__XILINX_SHREG_ (input C, input D, input [31:0] L, input E, output Q, o \$__XILINX_SHREG_ #(.DEPTH(DEPTH-64), .INIT(INIT[DEPTH-64-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_2 (.C(C), .D(T3), .L(L[4:0]), .E(E), .Q(T4)); if (&_TECHMAP_CONSTMSK_L_) assign Q = T4; - else begin - MUXF7 fpga_mux_0 (.O(T5), .I0(T0), .I1(T2), .S(L[5])); - MUXF7 fpga_mux_1 (.O(T6), .I0(T4), .I1(1'b0 /* unused */), .S(L[5])); - MUXF8 fpga_mux_2 (.O(Q), .I0(T5), .I1(T6), .S(L[6])); - end + else + \$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T4), .I3(1'bx), .S0(L[5]), .S1(L[6]), .O(Q)); end else if (DEPTH > 97 && DEPTH < 128) begin wire T0, T1, T2, T3, T4, T5, T6, T7, T8; @@ -107,11 +104,8 @@ module \$__XILINX_SHREG_ (input C, input D, input [31:0] L, input E, output Q, o \$__XILINX_SHREG_ #(.DEPTH(DEPTH-96), .INIT(INIT[DEPTH-96-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_3 (.C(C), .D(T5), .L(L[4:0]), .E(E), .Q(T6)); if (&_TECHMAP_CONSTMSK_L_) assign Q = T6; - else begin - MUXF7 fpga_mux_0 (.O(T7), .I0(T0), .I1(T2), .S(L[5])); - MUXF7 fpga_mux_1 (.O(T8), .I0(T4), .I1(T6), .S(L[5])); - MUXF8 fpga_mux_2 (.O(Q), .I0(T7), .I1(T8), .S(L[6])); - end + else + \$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T4), .I3(T6), .S0(L[5]), .S1(L[6]), .O(Q)); end else if (DEPTH == 128) begin wire T0, T1, T2, T3, T4, T5, T6; @@ -121,12 +115,8 @@ module \$__XILINX_SHREG_ (input C, input D, input [31:0] L, input E, output Q, o SRLC32E #(.INIT(INIT_R[128-1:96]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_3 (.A(L[4:0]), .CE(CE), .CLK(C), .D(T5), .Q(T6), .Q31(SO)); if (&_TECHMAP_CONSTMSK_L_) assign Q = T6; - else begin - wire T7, T8; - MUXF7 fpga_mux_0 (.O(T7), .I0(T0), .I1(T2), .S(L[5])); - MUXF7 fpga_mux_1 (.O(T8), .I0(T4), .I1(T6), .S(L[5])); - MUXF8 fpga_mux_2 (.O(Q), .I0(T7), .I1(T8), .S(L[6])); - end + else + \$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T4), .I3(T6), .S0(L[5]), .S1(L[6]), .O(Q)); end // For fixed length, if just 1 over a convenient value, decompose else if (DEPTH <= 129 && &_TECHMAP_CONSTMSK_L_) begin @@ -158,3 +148,220 @@ module \$__XILINX_SHREG_ (input C, input D, input [31:0] L, input E, output Q, o end endgenerate endmodule + +`ifdef MIN_MUX_INPUTS +module \$__XILINX_SHIFTX (A, B, Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 1; + parameter B_WIDTH = 1; + parameter Y_WIDTH = 1; + + input [A_WIDTH-1:0] A; + input [B_WIDTH-1:0] B; + output [Y_WIDTH-1:0] Y; + + parameter [A_WIDTH-1:0] _TECHMAP_CONSTMSK_A_ = 0; + parameter [A_WIDTH-1:0] _TECHMAP_CONSTVAL_A_ = 0; + parameter [B_WIDTH-1:0] _TECHMAP_CONSTMSK_B_ = 0; + parameter [B_WIDTH-1:0] _TECHMAP_CONSTVAL_B_ = 0; + + function integer A_WIDTH_trimmed; + input integer start; + begin + A_WIDTH_trimmed = start; + while (A_WIDTH_trimmed > 0 && _TECHMAP_CONSTMSK_A_[A_WIDTH_trimmed-1] && _TECHMAP_CONSTVAL_A_[A_WIDTH_trimmed-1] === 1'bx) + A_WIDTH_trimmed = A_WIDTH_trimmed - 1; + end + endfunction + + generate + genvar i, j; + // Bit-blast + if (Y_WIDTH > 1) begin + for (i = 0; i < Y_WIDTH; i++) + \$__XILINX_SHIFTX #(.A_SIGNED(A_SIGNED), .B_SIGNED(B_SIGNED), .A_WIDTH(A_WIDTH-Y_WIDTH+1), .B_WIDTH(B_WIDTH), .Y_WIDTH(1'd1)) bitblast (.A(A[A_WIDTH-Y_WIDTH+i:i]), .B(B), .Y(Y[i])); + end + // If the LSB of B is constant zero (and Y_WIDTH is 1) then + // we can optimise by removing every other entry from A + // and popping the constant zero from B + else if (_TECHMAP_CONSTMSK_B_[0] && !_TECHMAP_CONSTVAL_B_[0]) begin + wire [(A_WIDTH+1)/2-1:0] A_i; + for (i = 0; i < (A_WIDTH+1)/2; i++) + assign A_i[i] = A[i*2]; + \$__XILINX_SHIFTX #(.A_SIGNED(A_SIGNED), .B_SIGNED(B_SIGNED), .A_WIDTH((A_WIDTH+1'd1)/2'd2), .B_WIDTH(B_WIDTH-1'd1), .Y_WIDTH(Y_WIDTH)) _TECHMAP_REPLACE_ (.A(A_i), .B(B[B_WIDTH-1:1]), .Y(Y)); + end + // Trim off any leading 1'bx -es in A + else if (_TECHMAP_CONSTMSK_A_[A_WIDTH-1] && _TECHMAP_CONSTVAL_A_[A_WIDTH-1] === 1'bx) begin + localparam A_WIDTH_new = A_WIDTH_trimmed(A_WIDTH-1); + \$__XILINX_SHIFTX #(.A_SIGNED(A_SIGNED), .B_SIGNED(B_SIGNED), .A_WIDTH(A_WIDTH_new), .B_WIDTH(B_WIDTH), .Y_WIDTH(Y_WIDTH)) _TECHMAP_REPLACE_ (.A(A[A_WIDTH_new-1:0]), .B(B), .Y(Y)); + end + else if (A_WIDTH < `MIN_MUX_INPUTS) begin + wire _TECHMAP_FAIL_ = 1; + end + else if (A_WIDTH == 2) begin + MUXF7 fpga_hard_mux (.I0(A[0]), .I1(A[1]), .S(B[0]), .O(Y)); + end + else if (A_WIDTH <= 4) begin + wire [4-1:0] Ax; + if (A_WIDTH == 4) + assign Ax = A; + else + // Rather than extend with 1'bx which gets flattened to 1'b0 + // causing the "don't care" status to get lost, extend with + // the same driver of F7B.I0 so that we can optimise F7B away + // later + assign Ax = {A[1], A}; + \$__XILINX_MUXF78 fpga_hard_mux (.I0(Ax[0]), .I1(Ax[2]), .I2(Ax[1]), .I3(Ax[3]), .S0(B[1]), .S1(B[0]), .O(Y)); + end + // Note that the following decompositions are 'backwards' in that + // the LSBs are placed on the hard resources, and the soft resources + // are used for MSBs. + // This has the effect of more effectively utilising the hard mux; + // take for example a 5:1 multiplexer, currently this would map as: + // + // A[0] \___ __ A[0] \__ __ + // A[4] / \| \ whereas the more A[1] / \| \ + // A[1] _____| | obvious mapping A[2] \___| | + // A[2] _____| |-- of MSBs to hard A[3] / | |__ + // A[3]______| | resources would A[4] ____| | + // |__/ lead to: 1'bx ____| | + // || |__/ + // || || + // B[1:0] B[1:2] + // + // Expectation would be that the 'forward' mapping (right) is more + // area efficient (consider a 9:1 multiplexer using 2x4:1 multiplexers + // on its I0 and I1 inputs, and A[8] and 1'bx on its I2 and I3 inputs) + // but that the 'backwards' mapping (left) is more delay efficient + // since smaller LUTs are faster than wider ones. + else if (A_WIDTH <= 8) begin + wire [8-1:0] Ax = {{{8-A_WIDTH}{1'bx}}, A}; + wire T0 = B[2] ? Ax[4] : Ax[0]; + wire T1 = B[2] ? Ax[5] : Ax[1]; + wire T2 = B[2] ? Ax[6] : Ax[2]; + wire T3 = B[2] ? Ax[7] : Ax[3]; + \$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y)); + end + else if (A_WIDTH <= 16) begin + wire [16-1:0] Ax = {{{16-A_WIDTH}{1'bx}}, A}; + wire T0 = B[2] ? B[3] ? Ax[12] : Ax[4] + : B[3] ? Ax[ 8] : Ax[0]; + wire T1 = B[2] ? B[3] ? Ax[13] : Ax[5] + : B[3] ? Ax[ 9] : Ax[1]; + wire T2 = B[2] ? B[3] ? Ax[14] : Ax[6] + : B[3] ? Ax[10] : Ax[2]; + wire T3 = B[2] ? B[3] ? Ax[15] : Ax[7] + : B[3] ? Ax[11] : Ax[3]; + \$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y)); + end + else begin + localparam num_mux16 = (A_WIDTH+15) / 16; + localparam clog2_num_mux16 = $clog2(num_mux16); + wire [num_mux16-1:0] T; + wire [num_mux16*16-1:0] Ax = {{(num_mux16*16-A_WIDTH){1'bx}}, A}; + for (i = 0; i < num_mux16; i++) + \$__XILINX_SHIFTX #( + .A_SIGNED(A_SIGNED), + .B_SIGNED(B_SIGNED), + .A_WIDTH(16), + .B_WIDTH(4), + .Y_WIDTH(Y_WIDTH) + ) fpga_mux ( + .A(Ax[i*16+:16]), + .B(B[3:0]), + .Y(T[i]) + ); + \$__XILINX_SHIFTX #( + .A_SIGNED(A_SIGNED), + .B_SIGNED(B_SIGNED), + .A_WIDTH(num_mux16), + .B_WIDTH(clog2_num_mux16), + .Y_WIDTH(Y_WIDTH) + ) _TECHMAP_REPLACE_ ( + .A(T), + .B(B[B_WIDTH-1-:clog2_num_mux16]), + .Y(Y)); + end + endgenerate +endmodule + +(* techmap_celltype = "$__XILINX_SHIFTX" *) +module _90__XILINX_SHIFTX (A, B, Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 1; + parameter B_WIDTH = 1; + parameter Y_WIDTH = 1; + + input [A_WIDTH-1:0] A; + input [B_WIDTH-1:0] B; + output [Y_WIDTH-1:0] Y; + + \$shiftx #(.A_SIGNED(A_SIGNED), .B_SIGNED(B_SIGNED), .A_WIDTH(A_WIDTH), .B_WIDTH(B_WIDTH), .Y_WIDTH(Y_WIDTH)) _TECHMAP_REPLACE_ (.A(A), .B(B), .Y(Y)); +endmodule + +module \$_MUX_ (A, B, S, Y); + input A, B, S; + output Y; + generate + if (`MIN_MUX_INPUTS == 2) + \$__XILINX_SHIFTX #(.A_SIGNED(0), .B_SIGNED(0), .A_WIDTH(2), .B_WIDTH(1), .Y_WIDTH(1)) _TECHMAP_REPLACE_ (.A({B,A}), .B(S), .Y(Y)); + else + wire _TECHMAP_FAIL_ = 1; + endgenerate +endmodule + +module \$_MUX4_ (A, B, C, D, S, T, Y); + input A, B, C, D, S, T; + output Y; + \$__XILINX_SHIFTX #(.A_SIGNED(0), .B_SIGNED(0), .A_WIDTH(4), .B_WIDTH(2), .Y_WIDTH(1)) _TECHMAP_REPLACE_ (.A({D,C,B,A}), .B({T,S}), .Y(Y)); +endmodule + +module \$_MUX8_ (A, B, C, D, E, F, G, H, S, T, U, Y); + input A, B, C, D, E, F, G, H, S, T, U; + output Y; + \$__XILINX_SHIFTX #(.A_SIGNED(0), .B_SIGNED(0), .A_WIDTH(8), .B_WIDTH(3), .Y_WIDTH(1)) _TECHMAP_REPLACE_ (.A({H,G,F,E,D,C,B,A}), .B({U,T,S}), .Y(Y)); +endmodule + +module \$_MUX16_ (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, S, T, U, V, Y); + input A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, S, T, U, V; + output Y; + \$__XILINX_SHIFTX #(.A_SIGNED(0), .B_SIGNED(0), .A_WIDTH(16), .B_WIDTH(4), .Y_WIDTH(1)) _TECHMAP_REPLACE_ (.A({P,O,N,M,L,K,J,I,H,G,F,E,D,C,B,A}), .B({V,U,T,S}), .Y(Y)); +endmodule +`endif + +`ifndef _ABC +module \$__XILINX_MUXF78 (O, I0, I1, I2, I3, S0, S1); + output O; + input I0, I1, I2, I3, S0, S1; + wire T0, T1; + parameter _TECHMAP_BITS_CONNMAP_ = 0; + parameter [_TECHMAP_BITS_CONNMAP_-1:0] _TECHMAP_CONNMAP_I0_ = 0; + parameter [_TECHMAP_BITS_CONNMAP_-1:0] _TECHMAP_CONNMAP_I1_ = 0; + parameter [_TECHMAP_BITS_CONNMAP_-1:0] _TECHMAP_CONNMAP_I2_ = 0; + parameter [_TECHMAP_BITS_CONNMAP_-1:0] _TECHMAP_CONNMAP_I3_ = 0; + parameter _TECHMAP_CONSTMSK_S0_ = 0; + parameter _TECHMAP_CONSTVAL_S0_ = 0; + parameter _TECHMAP_CONSTMSK_S1_ = 0; + parameter _TECHMAP_CONSTVAL_S1_ = 0; + if (_TECHMAP_CONSTMSK_S0_ && _TECHMAP_CONSTVAL_S0_ === 1'b1) + assign T0 = I1; + else if (_TECHMAP_CONSTMSK_S0_ || _TECHMAP_CONNMAP_I0_ === _TECHMAP_CONNMAP_I1_) + assign T0 = I0; + else + MUXF7 mux7a (.I0(I0), .I1(I1), .S(S0), .O(T0)); + if (_TECHMAP_CONSTMSK_S0_ && _TECHMAP_CONSTVAL_S0_ === 1'b1) + assign T1 = I3; + else if (_TECHMAP_CONSTMSK_S0_ || _TECHMAP_CONNMAP_I2_ === _TECHMAP_CONNMAP_I3_) + assign T1 = I2; + else + MUXF7 mux7b (.I0(I2), .I1(I3), .S(S0), .O(T1)); + if (_TECHMAP_CONSTMSK_S1_ && _TECHMAP_CONSTVAL_S1_ === 1'b1) + assign O = T1; + else if (_TECHMAP_CONSTMSK_S1_ || (_TECHMAP_CONNMAP_I0_ === _TECHMAP_CONNMAP_I1_ && _TECHMAP_CONNMAP_I1_ === _TECHMAP_CONNMAP_I2_ && _TECHMAP_CONNMAP_I2_ === _TECHMAP_CONNMAP_I3_)) + assign O = T0; + else + MUXF8 mux8 (.I0(T0), .I1(T1), .S(S1), .O(O)); +endmodule +`endif diff --git a/techlibs/xilinx/cells_sim.v b/techlibs/xilinx/cells_sim.v index 5a148be01..3937d3536 100644 --- a/techlibs/xilinx/cells_sim.v +++ b/techlibs/xilinx/cells_sim.v @@ -169,11 +169,19 @@ module MUXF8(output O, input I0, I1, S); assign O = S ? I1 : I0; endmodule +`ifdef _ABC +(* abc_box_id = 3, lib_whitebox *) +module \$__XILINX_MUXF78 (output O, input I0, I1, I2, I3, S0, S1); + assign O = S1 ? (S0 ? I3 : I2) + : (S0 ? I1 : I0); +endmodule +`endif + module XORCY(output O, input CI, LI); assign O = CI ^ LI; endmodule -(* abc_box_id = 3, abc_carry="CI,CO", lib_whitebox *) +(* abc_box_id = 4, abc_carry="CI,CO", lib_whitebox *) module CARRY4(output [3:0] CO, O, input CI, CYINIT, input [3:0] DI, S); assign O = S ^ {CO[2:0], CI | CYINIT}; assign CO[0] = S[0] ? CI | CYINIT : DI[0]; @@ -281,7 +289,7 @@ module FDPE_1 (output reg Q, input C, CE, D, PRE); always @(negedge C, posedge PRE) if (PRE) Q <= 1'b1; else if (CE) Q <= D; endmodule -(* abc_box_id = 4, abc_scc_break="D,WE" *) +(* abc_box_id = 5, abc_scc_break="D,WE" *) module RAM32X1D ( output DPO, SPO, input D, WCLK, WE, @@ -299,7 +307,7 @@ module RAM32X1D ( always @(posedge clk) if (WE) mem[a] <= D; endmodule -(* abc_box_id = 5, abc_scc_break="D,WE" *) +(* abc_box_id = 6, abc_scc_break="D,WE" *) module RAM64X1D ( output DPO, SPO, input D, WCLK, WE, @@ -317,7 +325,7 @@ module RAM64X1D ( always @(posedge clk) if (WE) mem[a] <= D; endmodule -(* abc_box_id = 6, abc_scc_break="D,WE" *) +(* abc_box_id = 7, abc_scc_break="D,WE" *) module RAM128X1D ( output DPO, SPO, input D, WCLK, WE, diff --git a/techlibs/xilinx/mux_map.v b/techlibs/xilinx/mux_map.v new file mode 100644 index 000000000..91aaf2118 --- /dev/null +++ b/techlibs/xilinx/mux_map.v @@ -0,0 +1,71 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2012 Clifford Wolf + * 2019 Eddie Hung + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +// The purpose of these mapping rules is to allow preserve all (sufficiently +// wide) $shiftx cells during 'techmap' so that they can be mapped to hard +// resources, rather than being bit-blasted to gates during 'techmap' +// execution + +module \$shiftx (A, B, Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 1; + parameter B_WIDTH = 1; + parameter Y_WIDTH = 1; + + input [A_WIDTH-1:0] A; + input [B_WIDTH-1:0] B; + output [Y_WIDTH-1:0] Y; + + parameter [B_WIDTH-1:0] _TECHMAP_CONSTMSK_B_ = 0; + parameter [B_WIDTH-1:0] _TECHMAP_CONSTVAL_B_ = 0; + + generate + if (B_SIGNED) begin + if (_TECHMAP_CONSTMSK_B_[B_WIDTH-1] && (_TECHMAP_CONSTVAL_B_[B_WIDTH-1] == 1'b0 || _TECHMAP_CONSTVAL_B_[B_WIDTH-1] === 1'bx)) + // Optimisation to remove B_SIGNED if sign bit of B is constant-0 + \$shiftx #( + .A_SIGNED(A_SIGNED), + .B_SIGNED(0), + .A_WIDTH(A_WIDTH), + .B_WIDTH(B_WIDTH-1'd1), + .Y_WIDTH(Y_WIDTH) + ) _TECHMAP_REPLACE_ ( + .A(A), .B(B[B_WIDTH-2:0]), .Y(Y) + ); + else + wire _TECHMAP_FAIL_ = 1; + end + else begin + if (((A_WIDTH + Y_WIDTH - 1) / Y_WIDTH) < `MIN_MUX_INPUTS) + wire _TECHMAP_FAIL_ = 1; + else + \$__XILINX_SHIFTX #( + .A_SIGNED(A_SIGNED), + .B_SIGNED(B_SIGNED), + .A_WIDTH(A_WIDTH), + .B_WIDTH(B_WIDTH), + .Y_WIDTH(Y_WIDTH) + ) _TECHMAP_REPLACE_ ( + .A(A), .B(B), .Y(Y) + ); + end + endgenerate +endmodule diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc index b7c32d2e0..ef7660288 100644 --- a/techlibs/xilinx/synth_xilinx.cc +++ b/techlibs/xilinx/synth_xilinx.cc @@ -2,6 +2,7 @@ * yosys -- Yosys Open SYnthesis Suite * * Copyright (C) 2012 Clifford Wolf + * (C) 2019 Eddie Hung * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -77,6 +78,11 @@ struct SynthXilinxPass : public ScriptPass log(" -nowidelut\n"); log(" do not use MUXF[78] resources to implement LUTs larger than LUT6s\n"); log("\n"); + log(" -widemux \n"); + log(" enable inference of hard multiplexer resources (MUXF[78]) for muxes at or\n"); + log(" above this number of inputs (minimum value 2, recommended value >= 5).\n"); + log(" default: 0 (no inference)\n"); + log("\n"); log(" -run :\n"); log(" only run the commands between the labels (see below). an empty\n"); log(" from label is synonymous to 'begin', and empty to label is\n"); @@ -99,6 +105,7 @@ struct SynthXilinxPass : public ScriptPass std::string top_opt, edif_file, blif_file, family; bool flatten, retime, vpr, nobram, nodram, nosrl, nocarry, nowidelut, abc9; + int widemux; void clear_flags() YS_OVERRIDE { @@ -116,6 +123,7 @@ struct SynthXilinxPass : public ScriptPass nocarry = false; nowidelut = false; abc9 = false; + widemux = 0; } void execute(std::vector args, RTLIL::Design *design) YS_OVERRIDE @@ -186,6 +194,10 @@ struct SynthXilinxPass : public ScriptPass nosrl = true; continue; } + if (args[argidx] == "-widemux" && argidx+1 < args.size()) { + widemux = std::stoi(args[++argidx]); + continue; + } if (args[argidx] == "-abc9") { abc9 = true; continue; @@ -197,6 +209,9 @@ struct SynthXilinxPass : public ScriptPass if (family != "xcup" && family != "xcu" && family != "xc7" && family != "xc6s") log_cmd_error("Invalid Xilinx -family setting: %s\n", family.c_str()); + if (widemux != 0 && widemux < 2) + log_cmd_error("-widemux value must be 0 or >= 2.\n"); + if (!design->full_selection()) log_cmd_error("This command only operates on fully selected designs!\n"); @@ -212,9 +227,9 @@ struct SynthXilinxPass : public ScriptPass { if (check_label("begin")) { if (vpr) - run("read_verilog -lib -D _ABC -D_EXPLICIT_CARRY +/xilinx/cells_sim.v"); + run("read_verilog -lib -icells -D _ABC -D_EXPLICIT_CARRY +/xilinx/cells_sim.v"); else - run("read_verilog -lib -D _ABC +/xilinx/cells_sim.v"); + run("read_verilog -lib -icells -D _ABC +/xilinx/cells_sim.v"); run("read_verilog -lib +/xilinx/cells_xtra.v"); @@ -224,21 +239,41 @@ struct SynthXilinxPass : public ScriptPass run(stringf("hierarchy -check %s", top_opt.c_str())); } - if (check_label("flatten", "(with '-flatten' only)")) { - if (flatten || help_mode) { - run("proc"); - run("flatten"); - } - } - if (check_label("coarse")) { - run("synth -run coarse"); + run("proc"); + if (help_mode || flatten) + run("flatten", "(if -flatten)"); + run("opt_expr"); + run("opt_clean"); + run("check"); + run("opt"); + if (help_mode) + run("wreduce [-keepdc]", "(option for '-widemux')"); + else + run("wreduce" + std::string(widemux > 0 ? " -keepdc" : "")); + run("peepopt"); + run("opt_clean"); + + if (widemux > 0 || help_mode) + run("muxpack", " ('-widemux' only)"); // shregmap -tech xilinx can cope with $shiftx and $mux // cells for identifying variable-length shift registers, // so attempt to convert $pmux-es to the former - if (!nosrl || help_mode) - run("pmux2shiftx", "(skip if '-nosrl')"); + // Also: wide multiplexer inference benefits from this too + if (!(nosrl && widemux == 0) || help_mode) { + run("pmux2shiftx", "(skip if '-nosrl' and '-widemux=0')"); + run("clean", " (skip if '-nosrl' and '-widemux=0')"); + } + + run("techmap -map +/cmp2lut.v -D LUT_WIDTH=6"); + run("alumacc"); + run("share"); + run("opt"); + run("fsm"); + run("opt -fast"); + run("memory -nomap"); + run("opt_clean"); } if (check_label("bram", "(skip if '-nobram')")) { @@ -256,43 +291,81 @@ struct SynthXilinxPass : public ScriptPass } if (check_label("fine")) { - run("opt -fast -full"); + if (widemux > 0) + run("opt -fast -mux_bool -undriven -fine"); // Necessary to omit -mux_undef otherwise muxcover + // performs less efficiently + else + run("opt -fast -full"); run("memory_map"); run("dffsr2dff"); run("dff2dffe"); + if (help_mode) { + run("simplemap t:$mux", " ('-widemux' only)"); + run("muxcover , ('-widemux' only)"); + } + else if (widemux > 0) { + run("simplemap t:$mux"); + constexpr int cost_mux2 = 100; + std::string muxcover_args = stringf(" -nodecode -mux2=%d", cost_mux2); + switch (widemux) { + case 2: muxcover_args += stringf(" -mux4=%d -mux8=%d -mux16=%d", cost_mux2+1, cost_mux2+2, cost_mux2+3); break; + case 3: + case 4: muxcover_args += stringf(" -mux4=%d -mux8=%d -mux16=%d", cost_mux2*(widemux-1)-2, cost_mux2*(widemux-1)-1, cost_mux2*(widemux-1)); break; + case 5: + case 6: + case 7: + case 8: muxcover_args += stringf(" -mux8=%d -mux16=%d", cost_mux2*(widemux-1)-1, cost_mux2*(widemux-1)); break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + default: muxcover_args += stringf(" -mux16=%d", cost_mux2*(widemux-1)-1); break; + } + run("muxcover " + muxcover_args); + } run("opt -full"); if (!nosrl || help_mode) { // shregmap operates on bit-level flops, not word-level, // so break those down here - run("simplemap t:$dff t:$dffe", "(skip if '-nosrl')"); + run("simplemap t:$dff t:$dffe", " (skip if '-nosrl')"); // shregmap with '-tech xilinx' infers variable length shift regs run("shregmap -tech xilinx -minlen 3", "(skip if '-nosrl')"); } - std::string techmap_files = " -map +/techmap.v"; + std::string techmap_args = " -map +/techmap.v"; if (help_mode) - techmap_files += " [-map +/xilinx/arith_map.v]"; + techmap_args += " [-map +/xilinx/mux_map.v]"; + else if (widemux > 0) + techmap_args += stringf(" -D MIN_MUX_INPUTS=%d -map +/xilinx/mux_map.v", widemux); + if (help_mode) + techmap_args += " [-map +/xilinx/arith_map.v]"; else if (!nocarry) { - techmap_files += " -map +/xilinx/arith_map.v"; + techmap_args += " -map +/xilinx/arith_map.v"; if (vpr) - techmap_files += " -D _EXPLICIT_CARRY"; + techmap_args += " -D _EXPLICIT_CARRY"; else if (abc9) - techmap_files += " -D _CLB_CARRY"; + techmap_args += " -D _CLB_CARRY"; } - run("techmap " + techmap_files); + run("techmap " + techmap_args); run("opt -fast"); } if (check_label("map_cells")) { - run("techmap -map +/techmap.v -map +/xilinx/cells_map.v"); + std::string techmap_args = "-map +/techmap.v -D _ABC -map +/xilinx/cells_map.v"; + if (widemux > 0) + techmap_args += stringf(" -D MIN_MUX_INPUTS=%d", widemux); + run("techmap " + techmap_args); run("clean"); } if (check_label("map_luts")) { run("opt_expr -mux_undef"); if (help_mode) - run("abc -luts 2:2,3,6:5[,10,20] [-dff]", "(skip if 'nowidelut', only for '-retime')"); + run("abc -luts 2:2,3,6:5[,10,20] [-dff]", "(option for 'nowidelut', option for '-retime')"); else if (abc9) { if (family != "xc7") log_warning("'synth_xilinx -abc9' currently supports '-family xc7' only.\n");