xilinx: Initial support for LUT4 devices.

Adds support for mapping logic, including LUTs, wide LUTs, and carry
chains.

Fixes #1547
This commit is contained in:
Marcin Kościelnicki 2020-02-03 16:19:24 +01:00 committed by Marcelina Kościelnicka
parent 1f54b0008f
commit d48950d92d
6 changed files with 235 additions and 54 deletions

View File

@ -34,6 +34,12 @@ module _80_xilinx_lcu (P, G, CI, CO);
genvar i;
`ifdef _EXPLICIT_CARRY
localparam EXPLICIT_CARRY = 1'b1;
`else
localparam EXPLICIT_CARRY = 1'b0;
`endif
generate if (EXPLICIT_CARRY || `LUT_SIZE == 4) begin
wire [WIDTH-1:0] C = {CO, CI};
wire [WIDTH-1:0] S = P & ~G;
@ -47,7 +53,7 @@ module _80_xilinx_lcu (P, G, CI, CO);
);
end endgenerate
`else
end else begin
localparam CARRY4_COUNT = (WIDTH + 3) / 4;
localparam MAX_WIDTH = CARRY4_COUNT * 4;
@ -79,7 +85,7 @@ module _80_xilinx_lcu (P, G, CI, CO);
);
end
end endgenerate
`endif
end endgenerate
endmodule
@ -116,9 +122,34 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
genvar i;
`ifdef _EXPLICIT_CARRY
localparam EXPLICIT_CARRY = 1'b1;
`else
localparam EXPLICIT_CARRY = 1'b0;
`endif
generate if (`LUT_SIZE == 4) begin
wire [Y_WIDTH-1:0] C = {CO, CI};
wire [Y_WIDTH-1:0] S = {AA ^ BB};
genvar i;
generate for (i = 0; i < Y_WIDTH; i = i + 1) begin:slice
MUXCY muxcy (
.CI(C[i]),
.DI(AA[i]),
.S(S[i]),
.O(CO[i])
);
XORCY xorcy (
.CI(C[i]),
.LI(S[i]),
.O(Y[i])
);
end endgenerate
end else if (EXPLICIT_CARRY) begin
wire [Y_WIDTH-1:0] S = AA ^ BB;
wire [Y_WIDTH-1:0] DI = AA & BB;
wire CINIT;
// Carry chain.
@ -138,7 +169,7 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
generate for (i = 0; i < 1; i = i + 1) begin:slice
CARRY0 #(.CYINIT_FABRIC(1)) carry(
.CI_INIT(CI),
.DI(DI[0]),
.DI(AA[0]),
.S(S[0]),
.CO_CHAIN(CO_CHAIN[0]),
.CO_FABRIC(CO[0]),
@ -150,7 +181,7 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
if(i % 4 == 0) begin
CARRY0 carry (
.CI(C[i]),
.DI(DI[i]),
.DI(AA[i]),
.S(S[i]),
.CO_CHAIN(CO_CHAIN[i]),
.CO_FABRIC(CO[i]),
@ -161,7 +192,7 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
begin
CARRY carry (
.CI(C[i]),
.DI(DI[i]),
.DI(AA[i]),
.S(S[i]),
.CO_CHAIN(CO_CHAIN[i]),
.CO_FABRIC(CO[i]),
@ -174,7 +205,7 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
if(i % 4 == 0) begin
CARRY0 top_of_carry (
.CI(C[i]),
.DI(DI[i]),
.DI(AA[i]),
.S(S[i]),
.CO_CHAIN(CO_CHAIN[i]),
.O(Y[i])
@ -184,7 +215,7 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
begin
CARRY top_of_carry (
.CI(C[i]),
.DI(DI[i]),
.DI(AA[i]),
.S(S[i]),
.CO_CHAIN(CO_CHAIN[i]),
.O(Y[i])
@ -213,14 +244,14 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
end
end endgenerate
`else
end else begin
localparam CARRY4_COUNT = (Y_WIDTH + 3) / 4;
localparam MAX_WIDTH = CARRY4_COUNT * 4;
localparam PAD_WIDTH = MAX_WIDTH - Y_WIDTH;
wire [MAX_WIDTH-1:0] S = {{PAD_WIDTH{1'b0}}, AA ^ BB};
wire [MAX_WIDTH-1:0] DI = {{PAD_WIDTH{1'b0}}, AA & BB};
wire [MAX_WIDTH-1:0] DI = {{PAD_WIDTH{1'b0}}, AA};
wire [MAX_WIDTH-1:0] O;
wire [MAX_WIDTH-1:0] C;
@ -251,7 +282,7 @@ module _80_xilinx_alu (A, B, CI, BI, X, Y, CO);
end
end endgenerate
`endif
end endgenerate
assign X = S;
endmodule

View File

@ -51,43 +51,45 @@ module \$lut (A, Y);
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]));
end else
if (WIDTH == 5) begin
if (WIDTH == 5 && WIDTH <= `LUT_WIDTH) begin
LUT5 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.O(Y),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]));
end else
if (WIDTH == 6) begin
if (WIDTH == 6 && WIDTH <= `LUT_WIDTH) begin
LUT6 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.O(Y),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
end else
if (WIDTH == 5 && WIDTH > `LUT_WIDTH) begin
wire f0, f1;
\$lut #(.LUT(LUT[15: 0]), .WIDTH(4)) lut0 (.A(A[3:0]), .Y(f0));
\$lut #(.LUT(LUT[31:16]), .WIDTH(4)) lut1 (.A(A[3:0]), .Y(f1));
MUXF5 mux5(.I0(f0), .I1(f1), .S(A[4]), .O(Y));
end else
if (WIDTH == 6 && WIDTH > `LUT_WIDTH) begin
wire f0, f1;
\$lut #(.LUT(LUT[31: 0]), .WIDTH(5)) lut0 (.A(A[4:0]), .Y(f0));
\$lut #(.LUT(LUT[63:32]), .WIDTH(5)) lut1 (.A(A[4:0]), .Y(f1));
MUXF6 mux6(.I0(f0), .I1(f1), .S(A[5]), .O(Y));
end else
if (WIDTH == 7) begin
wire T0, T1;
LUT6 #(.INIT(LUT[63:0])) fpga_lut_0 (.O(T0),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
LUT6 #(.INIT(LUT[127:64])) fpga_lut_1 (.O(T1),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
MUXF7 fpga_mux_0 (.O(Y), .I0(T0), .I1(T1), .S(A[6]));
wire f0, f1;
\$lut #(.LUT(LUT[ 63: 0]), .WIDTH(6)) lut0 (.A(A[5:0]), .Y(f0));
\$lut #(.LUT(LUT[127:64]), .WIDTH(6)) lut1 (.A(A[5:0]), .Y(f1));
MUXF7 mux7(.I0(f0), .I1(f1), .S(A[6]), .O(Y));
end else
if (WIDTH == 8) begin
wire T0, T1, T2, T3, T4, T5;
LUT6 #(.INIT(LUT[63:0])) fpga_lut_0 (.O(T0),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
LUT6 #(.INIT(LUT[127:64])) fpga_lut_1 (.O(T1),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
LUT6 #(.INIT(LUT[191:128])) fpga_lut_2 (.O(T2),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
LUT6 #(.INIT(LUT[255:192])) fpga_lut_3 (.O(T3),
.I0(A[0]), .I1(A[1]), .I2(A[2]),
.I3(A[3]), .I4(A[4]), .I5(A[5]));
MUXF7 fpga_mux_0 (.O(T4), .I0(T0), .I1(T1), .S(A[6]));
MUXF7 fpga_mux_1 (.O(T5), .I0(T2), .I1(T3), .S(A[6]));
MUXF8 fpga_mux_2 (.O(Y), .I0(T4), .I1(T5), .S(A[7]));
wire f0, f1;
\$lut #(.LUT(LUT[127: 0]), .WIDTH(7)) lut0 (.A(A[6:0]), .Y(f0));
\$lut #(.LUT(LUT[255:128]), .WIDTH(7)) lut1 (.A(A[6:0]), .Y(f1));
MUXF8 mux8(.I0(f0), .I1(f1), .S(A[7]), .O(Y));
end else
if (WIDTH == 9) begin
wire f0, f1;
\$lut #(.LUT(LUT[255: 0]), .WIDTH(8)) lut0 (.A(A[7:0]), .Y(f0));
\$lut #(.LUT(LUT[511:256]), .WIDTH(8)) lut1 (.A(A[7:0]), .Y(f1));
MUXF9 mux9(.I0(f0), .I1(f1), .S(A[8]), .O(Y));
end else begin
wire _TECHMAP_FAIL_ = 1;
end

View File

@ -49,10 +49,25 @@ struct SynthXilinxPass : public ScriptPass
log(" -top <module>\n");
log(" use the specified module as top module\n");
log("\n");
log(" -family {xcup|xcu|xc7|xc6v|xc5v|xc6s}\n");
log(" -family <family>\n");
log(" run synthesis for the specified Xilinx architecture\n");
log(" generate the synthesis netlist for the specified family.\n");
log(" default: xc7\n");
log(" supported values:\n");
log(" - xcup: Ultrascale Plus\n");
log(" - xcu: Ultrascale\n");
log(" - xc7: Series 7 (default)\n");
log(" - xc6s: Spartan 6\n");
log(" - xc6v: Virtex 6\n");
log(" - xc5v: Virtex 5 (EXPERIMENTAL)\n");
log(" - xc4v: Virtex 4 (EXPERIMENTAL)\n");
log(" - xc3sda: Spartan 3A DSP (EXPERIMENTAL)\n");
log(" - xc3sa: Spartan 3A (EXPERIMENTAL)\n");
log(" - xc3se: Spartan 3E (EXPERIMENTAL)\n");
log(" - xc3s: Spartan 3 (EXPERIMENTAL)\n");
log(" - xc2vp: Virtex 2 Pro (EXPERIMENTAL)\n");
log(" - xc2v: Virtex 2 (EXPERIMENTAL)\n");
log(" - xcve: Virtex E, Spartan 2E (EXPERIMENTAL)\n");
log(" - xcv: Virtex, Spartan 2 (EXPERIMENTAL)\n");
log("\n");
log(" -edif <file>\n");
log(" write the design to the specified edif file. writing of an output file\n");
@ -82,10 +97,10 @@ struct SynthXilinxPass : public ScriptPass
log(" do not use XORCY/MUXCY/CARRY4 cells in output netlist\n");
log("\n");
log(" -nowidelut\n");
log(" do not use MUXF[78] resources to implement LUTs larger than LUT6s\n");
log(" do not use MUXF[5-9] resources to implement LUTs larger than native for the target\n");
log("\n");
log(" -nodsp\n");
log(" do not use DSP48E1s to implement multipliers and associated logic\n");
log(" do not use DSP48*s to implement multipliers and associated logic\n");
log("\n");
log(" -noiopad\n");
log(" disable I/O buffer insertion (useful for hierarchical or \n");
@ -131,6 +146,8 @@ struct SynthXilinxPass : public ScriptPass
bool abc9, dff_mode;
bool flatten_before_abc;
int widemux;
int lut_size;
int widelut_size;
void clear_flags() YS_OVERRIDE
{
@ -156,6 +173,7 @@ struct SynthXilinxPass : public ScriptPass
dff_mode = false;
flatten_before_abc = false;
widemux = 0;
lut_size = 6;
}
void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@ -270,9 +288,39 @@ struct SynthXilinxPass : public ScriptPass
}
extra_args(args, argidx, design);
if (family != "xcup" && family != "xcu" && family != "xc7" && family != "xc6v" && family != "xc5v" && family != "xc6s")
if (family == "xcup" || family == "xcu") {
lut_size = 6;
widelut_size = 9;
} else if (family == "xc7" ||
family == "xc6v" ||
family == "xc5v" ||
family == "xc6s") {
lut_size = 6;
widelut_size = 8;
} else if (family == "xc4v" ||
family == "xc3sda" ||
family == "xc3sa" ||
family == "xc3se" ||
family == "xc3s" ||
family == "xc2vp" ||
family == "xc2v") {
lut_size = 4;
widelut_size = 8;
} else if (family == "xcve" || family == "xcv") {
lut_size = 4;
widelut_size = 6;
} else
log_cmd_error("Invalid Xilinx -family setting: '%s'.\n", family.c_str());
if (widemux != 0 && lut_size != 6)
log_cmd_error("-widemux is not currently supported for LUT4-based architectures.\n");
if (lut_size != 6) {
log_warning("Shift register inference not yet supported for family %s.\n", family.c_str());
nosrl = true;
nolutram = true;
}
if (widemux != 0 && widemux < 2)
log_cmd_error("-widemux value must be 0 or >= 2.\n");
@ -292,6 +340,9 @@ struct SynthXilinxPass : public ScriptPass
void script() YS_OVERRIDE
{
std::string lut_size_s = std::to_string(lut_size);
if (help_mode)
lut_size_s = "[46]";
std::string ff_map_file;
if (help_mode)
ff_map_file = "+/xilinx/{family}_ff_map.v";
@ -344,7 +395,7 @@ struct SynthXilinxPass : public ScriptPass
run("clean", " (skip if '-nosrl' and '-widemux=0')");
}
run("techmap -map +/cmp2lut.v -D LUT_WIDTH=6");
run("techmap -map +/cmp2lut.v -D LUT_WIDTH=" + lut_size_s);
}
if (check_label("map_dsp", "(skip if '-nodsp')")) {
@ -353,7 +404,7 @@ struct SynthXilinxPass : public ScriptPass
// NB: Xilinx multipliers are signed only
if (help_mode)
run("techmap -map +/mul2dsp.v -map +/xilinx/{family}_dsp_map.v {options}");
else if (family == "xc2v" || family == "xc3s" || family == "xc3se" || family == "xc3sa")
else if (family == "xc2v" || family == "xc2vp" || family == "xc3s" || family == "xc3se" || family == "xc3sa")
run("techmap -map +/mul2dsp.v -map +/xilinx/xc3s_mult_map.v -D DSP_A_MAXWIDTH=18 -D DSP_B_MAXWIDTH=18 "
"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers
"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller
@ -523,14 +574,12 @@ struct SynthXilinxPass : public ScriptPass
if (!nosrl || help_mode)
run("xilinx_srl -variable -minlen 3", "(skip if '-nosrl')");
std::string techmap_args = " -map +/techmap.v";
std::string techmap_args = " -map +/techmap.v -D LUT_SIZE=" + lut_size_s;
if (help_mode)
techmap_args += " [-map +/xilinx/mux_map.v]";
else if (widemux > 0)
techmap_args += stringf(" -D MIN_MUX_INPUTS=%d -map +/xilinx/mux_map.v", widemux);
if (help_mode)
techmap_args += " [-map +/xilinx/arith_map.v]";
else if (!nocarry) {
if (!nocarry) {
techmap_args += " -map +/xilinx/arith_map.v";
if (vpr)
techmap_args += " -D _EXPLICIT_CARRY";
@ -563,6 +612,8 @@ struct SynthXilinxPass : public ScriptPass
if (help_mode)
run("abc -luts 2:2,3,6:5[,10,20] [-dff] [-D 1]", "(option for 'nowidelut', '-dff', '-retime')");
else if (abc9) {
if (lut_size != 6)
log_error("'synth_xilinx -abc9' not currently supported for LUT4-based devices.\n");
if (family != "xc7")
log_warning("'synth_xilinx -abc9' not currently supported for the '%s' family, "
"will use timing for 'xc7' instead.\n", family.c_str());
@ -588,10 +639,19 @@ struct SynthXilinxPass : public ScriptPass
}
else {
std::string abc_opts;
if (nowidelut)
abc_opts += " -luts 2:2,3,6:5";
else
abc_opts += " -luts 2:2,3,6:5,10,20";
if (lut_size != 6) {
if (nowidelut)
abc_opts += " -lut " + lut_size_s;
else
abc_opts += " -lut " + lut_size_s + ":" + std::to_string(widelut_size);
} else {
if (nowidelut)
abc_opts += " -luts 2:2,3,6:5";
else if (widelut_size == 8)
abc_opts += " -luts 2:2,3,6:5,10,20";
else
abc_opts += " -luts 2:2,3,6:5,10,20,40";
}
if (dff_mode)
abc_opts += " -dff";
if (retime)
@ -607,8 +667,14 @@ struct SynthXilinxPass : public ScriptPass
std::string techmap_args = "-map +/xilinx/lut_map.v -map +/xilinx/cells_map.v";
if (help_mode || !abc9)
techmap_args += stringf(" -map %s", ff_map_file.c_str());
techmap_args += " -D LUT_WIDTH=" + lut_size_s;
run("techmap " + techmap_args);
run("xilinx_dffopt");
if (help_mode)
run("xilinx_dffopt [-lut4]");
else if (lut_size == 4)
run("xilinx_dffopt -lut4");
else
run("xilinx_dffopt");
run("opt_lut_ins -tech xilinx");
}

View File

@ -1,11 +1,23 @@
read_verilog ../common/add_sub.v
hierarchy -top top
proc
design -save orig
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd top # Constrain all select calls below inside the top module
stat
select -assert-count 16 t:LUT2
select -assert-count 8 t:LUT2
select -assert-count 2 t:CARRY4
select -assert-none t:LUT2 t:CARRY4 %% t:* %D
design -load orig
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc3s -noiopad # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd top # Constrain all select calls below inside the top module
stat
select -assert-count 8 t:LUT2
select -assert-count 6 t:MUXCY
select -assert-count 8 t:XORCY
select -assert-none t:LUT2 t:MUXCY t:XORCY %% t:* %D

View File

@ -3,6 +3,8 @@ hierarchy -top fsm
proc
flatten
design -save orig
equiv_opt -run :prove -map +/xilinx/cells_sim.v synth_xilinx -noiopad
miter -equiv -make_assert -flatten gold gate miter
sat -verify -prove-asserts -show-public -set-at 1 in_reset 1 -seq 20 -prove-skip 1 miter
@ -17,3 +19,20 @@ select -assert-count 1 t:LUT2
select -assert-count 3 t:LUT5
select -assert-count 1 t:LUT6
select -assert-none t:BUFG t:FDRE t:FDSE t:LUT2 t:LUT5 t:LUT6 %% t:* %D
design -load orig
equiv_opt -run :prove -map +/xilinx/cells_sim.v synth_xilinx -family xc3se -noiopad
miter -equiv -make_assert -flatten gold gate miter
sat -verify -prove-asserts -show-public -set-at 1 in_reset 1 -seq 20 -prove-skip 1 miter
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd fsm # Constrain all select calls below inside the top module
stat
select -assert-count 1 t:BUFG
select -assert-count 6 t:FDRE
select -assert-count 1 t:LUT1
select -assert-count 3 t:LUT3
select -assert-count 6 t:LUT4
select -assert-count 6 t:MUXF5
select -assert-none t:BUFG t:FDRE t:LUT1 t:LUT3 t:LUT4 t:MUXF5 %% t:* %D

View File

@ -0,0 +1,51 @@
read_verilog ../common/mux.v
design -save read
hierarchy -top mux2
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc3se -noiopad # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd mux2 # Constrain all select calls below inside the top module
select -assert-count 1 t:LUT3
select -assert-none t:LUT3 %% t:* %D
design -load read
hierarchy -top mux4
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc3se -noiopad # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd mux4 # Constrain all select calls below inside the top module
select -assert-count 4 t:LUT1
select -assert-count 2 t:MUXF5
select -assert-count 1 t:MUXF6
select -assert-none t:LUT1 t:MUXF5 t:MUXF6 %% t:* %D
design -load read
hierarchy -top mux8
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc3se -noiopad # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd mux8 # Constrain all select calls below inside the top module
select -assert-count 4 t:LUT1
select -assert-count 3 t:LUT4
select -assert-count 2 t:MUXF5
select -assert-count 1 t:MUXF6
select -assert-none t:LUT1 t:LUT4 t:MUXF5 t:MUXF6 %% t:* %D
design -load read
hierarchy -top mux16
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc3se -noiopad # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd mux16 # Constrain all select calls below inside the top module
select -assert-max 32 t:LUT*
select -assert-max 8 t:MUXF6
select -assert-max 4 t:MUXF7
select -assert-none t:LUT* t:MUXF5 t:MUXF6 t:MUXF7 %% t:* %D