diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index c2257b720..884e12522 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -44,6 +44,7 @@ $(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h)) PEEPOPT_PATTERN = passes/pmgen/peepopt_shiftmul_right.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_shiftmul_left.pmg +PEEPOPT_PATTERN += passes/pmgen/peepopt_shiftadd.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_muldiv.pmg passes/pmgen/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index aef464d79..edd3b18a8 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -48,6 +48,9 @@ struct PeepoptPass : public Pass { log(" Analogously, replace A<<(B*C) with appropriate selection of\n"); log(" output bits from A<<(B<>(B+D) with (A'>>D)>>(B) where D is constant and\n"); + log(" A' is derived from A by padding or cutting inaccessible bits.\n"); + log("\n"); } void execute(std::vector args, RTLIL::Design *design) override { @@ -72,6 +75,7 @@ struct PeepoptPass : public Pass { pm.setup(module->selected_cells()); + pm.run_shiftadd(); pm.run_shiftmul_right(); pm.run_shiftmul_left(); pm.run_muldiv(); diff --git a/passes/pmgen/peepopt_shiftadd.pmg b/passes/pmgen/peepopt_shiftadd.pmg new file mode 100644 index 000000000..f9c930eae --- /dev/null +++ b/passes/pmgen/peepopt_shiftadd.pmg @@ -0,0 +1,121 @@ +pattern shiftadd +// +// Transforms add/sub+shift pairs that result from expressions such as data[s*W +C +:W2] +// specifically something like: out[W2-1:0] = data >> (s*W +C) +// will be transformed into: out[W2-1:0] = (data >> C) >> (s*W) +// this can then be optimized using peepopt_shiftmul_right.pmg +// + +match shift + select shift->type.in($shift, $shiftx, $shr) + filter !port(shift, \B).empty() +endmatch + +// the right shift amount +state shift_amount +// log2 scale factor in interpreting of shift_amount +// due to zero padding on the shift cell's B port +state log2scale +// zeros at the MSB position make it unsigned +state msb_zeros + +code shift_amount log2scale msb_zeros + shift_amount = port(shift, \B); + + log2scale = 0; + while (shift_amount[0] == State::S0) { + shift_amount.remove(0); + if (shift_amount.empty()) reject; + log2scale++; + } + + msb_zeros = 0; + while (shift_amount.bits().back() == State::S0) { + msb_zeros = true; + shift_amount.remove(GetSize(shift_amount) - 1); + if (shift_amount.empty()) reject; + } +endcode + +state var_signed +state var_signal +// offset: signed constant value c in data[var+c +:W1] (constant shift-right amount) +state offset + +match add + // either data[var+c +:W1] or data[var-c +:W1] + select add->type.in($add, $sub) + index port(add, \Y) === shift_amount + + // one must be constant, the other is variable + choice constport {\A, \B} + select !port(add, constport).empty() + select port(add, constport).is_fully_const() + define varport (constport == \A ? \B : \A) + + // if a value of var is able to wrap the output, the transformation might give wrong results + // an addition/substraction can at most flip one more bit than the largest operand (the carry bit) + // as long as the output can show this bit, no wrap should occur (assuming all signed-ness make sense) + select ( GetSize(port(add, \Y)) > max(GetSize(port(add, \A)), GetSize(port(add, \B))) ) + + define varport_A (varport == \A) + define is_sub add->type.in($sub) + + define constport_signed param(add, !varport_A ? \A_SIGNED : \B_SIGNED).as_bool() + define varport_signed param(add, varport_A ? \A_SIGNED : \B_SIGNED).as_bool(); + define offset_negative ((port(add, constport).bits().back() == State::S1) ^ (is_sub && varport_A)) + + // checking some value boundaries as well: + // data[...-c +:W1] is fine for +/-var (pad at LSB, all data still accessible) + // data[...+c +:W1] is only fine for +var(add) and var unsigned + // (+c cuts lower C bits, making them inaccessible, a signed var could try to access them) + // either its an add or the variable port is A (it must be positive) + select (add->type.in($add) || varport == \A) + + // -> data[var+c +:W1] (with var signed) is illegal + filter !(!offset_negative && varport_signed) + + // state-variables are assigned at the end only: + // shift the log2scale offset in-front of add to get true value: (var+c)< (var<getPort(varport) +endmatch + +code +{ + // positive constant offset with a signed variable (index) cannot be handled + // the above filter should get rid of this case but 'offset' is calculated differently + // due to limitations of state-variables in pmgen + // it should only differ if previous passes create invalid data + log_assert(!(offset>0 && var_signed)); + + did_something = true; + log("shiftadd pattern in %s: shift=%s, add/sub=%s, offset: %d\n", \ + log_id(module), log_id(shift), log_id(add), offset); + + SigSpec old_a = port(shift, \A), new_a; + if(offset<0) { + // data >> (...-c) transformed to {data, c'X} >> (...) + SigSpec padding( (shift->type.in($shiftx) ? State::Sx : State::S0), -offset ); + new_a.append(padding); + new_a.append(old_a); + } else { + // data >> (...+c) transformed to data[MAX:c] >> (...) + new_a.append(old_a.extract_end(offset)); + + } + + SigSpec new_b = {var_signal, SigSpec(State::S0, log2scale)}; + if (msb_zeros || !var_signed) + new_b.append(State::S0); + + shift->setPort(\A, new_a); + shift->setParam(\A_WIDTH, GetSize(new_a)); + shift->setPort(\B, new_b); + shift->setParam(\B_WIDTH, GetSize(new_b)); + blacklist(add); + accept; +} +endcode diff --git a/tests/simple/partsel.v b/tests/simple/partsel.v index 5e9730d6b..722ed7b1b 100644 --- a/tests/simple/partsel.v +++ b/tests/simple/partsel.v @@ -110,3 +110,42 @@ module partsel_test007 ( dout[n+1] = din[n]; end endmodule + + +module partsel_test008 ( + input [127:0] din, + input [3:0] idx, + input [4:0] uoffset, + input signed [4:0] soffset, + output [ 7:0] dout0, + output [ 7:0] dout1, + output [ 7:0] dout2, + output [ 7:0] dout3, + output [ 3:0] dout4, + output [ 3:0] dout5, + output [ 3:0] dout6, + output [ 3:0] dout7, + output [ 3:0] dout8, + output [11:0] dout9, + output [11:0] dout10, + output [11:0] dout11 +); + +// common: block-select with offsets +assign dout0 = din[idx*8 +uoffset +:8]; +assign dout1 = din[idx*8 -uoffset +:8]; +assign dout2 = din[idx*8 +soffset +:8]; +assign dout3 = din[idx*8 -soffset +:8]; + +// only partial block used +assign dout4 = din[idx*8 +uoffset +:4]; +assign dout5 = din[idx*8 -uoffset +:4]; +assign dout6 = din[idx*8 +soffset +:4]; +assign dout7 = din[idx*8 -soffset +:4]; + +// uncommon: more than one block used +assign dout8 = din[idx*8 +uoffset +:12]; +assign dout9 = din[idx*8 -uoffset +:12]; +assign dout10 = din[idx*8 +soffset +:12]; +assign dout11 = din[idx*8 -soffset +:12]; +endmodule