Improved SRAM output multiplexer, using NAND/NOR.
* New: In cumulus/plugins/sram_256x32.py, build the output mux using a NAND2/NOR2 binary tree instead of mux2/mux3. Use more, but much smaller cells. The reduction of wirelength (from Yosys) goes from 4% to 15% for the non-folded variant. Uses a specially placed tree to minimize wire length. * New: In cumulus/plugins/sram.py, extend StdCellConf to convert names accross library flavors (FlexLib_TSMC_C180, FlexLib_Sky130 and generic SxLib).
This commit is contained in:
parent
d294a770c4
commit
9594476ab6
|
@ -26,8 +26,8 @@ from helpers.io import ErrorMessage, WarningMessage
|
|||
from helpers.overlay import UpdateSession
|
||||
from helpers import trace, l, u, n
|
||||
import plugins
|
||||
from Hurricane import Breakpoint, DbU, Box, Net, Cell, Instance, \
|
||||
Transformation, PythonAttributes
|
||||
from Hurricane import DataBase, Breakpoint, DbU, Box, Net, Cell, \
|
||||
Instance, Transformation, PythonAttributes
|
||||
import CRL
|
||||
from Foehn import FoehnEngine, DagExtension
|
||||
from plugins.chip.configuration import GaugeConf
|
||||
|
@ -49,6 +49,12 @@ class StdCellConf ( object ):
|
|||
reDataIn = re.compile( r'^i[0-9]?' )
|
||||
reDataOut = re.compile( r'^n?q' )
|
||||
|
||||
def __init__ ( self ):
|
||||
self.techName = DataBase.getDB().getTechnology().getName()
|
||||
|
||||
def __repr__ ( self ):
|
||||
return '<StdCellConf "{}">'.format( self.techName )
|
||||
|
||||
def isRegister ( self, cell ):
|
||||
"""Returns True if the cell is a register."""
|
||||
m = StdCellConf.reDFF.match( cell.getName() )
|
||||
|
@ -85,6 +91,16 @@ class StdCellConf ( object ):
|
|||
"""Returns True if the net is a data flow (i.e. not a control)."""
|
||||
return self.isDataIn(net) or self.isDataOut(net)
|
||||
|
||||
def getStdCellName ( self, name ):
|
||||
if self.techName == 'Sky130':
|
||||
if name == 'na2_x1': name = 'nand2_x0'
|
||||
if name == 'no2_x1': name = 'nor2_x0'
|
||||
if name == 'no3_x1': name = 'nor3_x0'
|
||||
return name
|
||||
|
||||
def getStdCell ( self, name ):
|
||||
return af.getCell( self.getStdCellName(name), CRL.Catalog.State.Views )
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Class : Bus.
|
||||
|
@ -186,6 +202,9 @@ class Column ( object ):
|
|||
Connect a bus to the column. ``busName`` is the name of the master net
|
||||
in the reference cell of the column.
|
||||
"""
|
||||
if not busName in self.busPlugs:
|
||||
raise ErrorMessage( 1, 'Column.setBusNet(): {} has no bus named "{}".' \
|
||||
.format( self.tag, busName ))
|
||||
busPlug = self.busPlugs[ busName ]
|
||||
if busPlug[0].getNet() and busPlug[0].getNet() != busNet[0]:
|
||||
print( Warning( 'Column.setBusNet(): Overrode {} {} -> {} with {}' \
|
||||
|
@ -318,11 +337,12 @@ class ColGroup ( object ):
|
|||
Initialize an *empty* column group. Sub-group or columns must be
|
||||
added afterwards.
|
||||
"""
|
||||
self.tag = tag
|
||||
self.parent = None
|
||||
self.order = None
|
||||
self.depth = 0
|
||||
self.childs = []
|
||||
self.tag = tag
|
||||
self.parent = None
|
||||
self.order = None
|
||||
self.depth = 0
|
||||
self.childs = []
|
||||
self.isReversed = False
|
||||
|
||||
def __iter__ ( self ):
|
||||
return ColGroupIterator( self )
|
||||
|
@ -358,11 +378,25 @@ class ColGroup ( object ):
|
|||
busWidth = max( busWidth, child.busWidth )
|
||||
return busWidth
|
||||
|
||||
def group ( self, child ):
|
||||
def group ( self, newChild, after=None, before=None ):
|
||||
""" Add a new child to the group. """
|
||||
self.childs.append( child )
|
||||
child.parent = self
|
||||
self.depth = max( self.depth, child.depth+1 )
|
||||
inserted = False
|
||||
if after is not None:
|
||||
for i in range(len(self.childs)):
|
||||
if self.childs[i] == after:
|
||||
self.childs.insert( i+1, newChild )
|
||||
inserted = True
|
||||
break
|
||||
if before is not None:
|
||||
for i in range(len(self.childs)):
|
||||
if self.childs[i] == before:
|
||||
self.childs.insert( i, newChild )
|
||||
inserted = True
|
||||
break
|
||||
if not inserted:
|
||||
self.childs.append( newChild )
|
||||
newChild.parent = self
|
||||
self.depth = max( self.depth, newChild.depth+1 )
|
||||
|
||||
def unGroup ( self, child=None ):
|
||||
""" Remove a child from the group (the child is *not* deleted). """
|
||||
|
@ -403,6 +437,7 @@ class ColGroup ( object ):
|
|||
for child in self.childs:
|
||||
child.reverse()
|
||||
self.childs.reverse()
|
||||
self.isReversed = not self.isReversed
|
||||
|
||||
def place ( self ):
|
||||
""" Place childs/colums from left to rigth. """
|
||||
|
@ -572,7 +607,7 @@ class FoldState ( object ):
|
|||
else:
|
||||
self.direction = BaseSRAM.TO_RIGHT
|
||||
self.x = self.xmin
|
||||
self.irow += self.sram.rootGroup.busWidth + 1
|
||||
self.irow += self.sram.rootGroup.busWidth + 2
|
||||
self.fold += 1
|
||||
|
||||
def addWidth ( self, width ):
|
||||
|
@ -622,12 +657,16 @@ class BaseSRAM ( object ):
|
|||
The overall relative placement is organized as follow : ::
|
||||
|
||||
+---------+-------------------------------------------+
|
||||
| | headers[1] (1 row) |
|
||||
| | headers[4] (1 row) |
|
||||
| +-------------------------------------------+
|
||||
| | headers[3] (1 row) |
|
||||
| +-------------------------------------------+
|
||||
| | |
|
||||
| | Column area, fold 1 (N rows) |
|
||||
| | |
|
||||
| decoder +-------------------------------------------+
|
||||
| | headers[1] (1 row) |
|
||||
| +-------------------------------------------+
|
||||
| | headers[0] (1 row) |
|
||||
| +-------------------------------------------+
|
||||
| | |
|
||||
|
@ -646,7 +685,7 @@ class BaseSRAM ( object ):
|
|||
self.busses = {}
|
||||
self.decoder = None
|
||||
self.toHeaders = []
|
||||
self.headers = [ HeaderRow( self ) for row in range(fold) ]
|
||||
self.headers = [ HeaderRow( self ) for row in range(fold*2) ]
|
||||
|
||||
@property
|
||||
def fold ( self ):
|
||||
|
@ -656,6 +695,12 @@ class BaseSRAM ( object ):
|
|||
if column.tag in self.foldTags:
|
||||
self.foldState.forceFold()
|
||||
|
||||
def getBus ( self, fmt ):
|
||||
""" Find a bus by it's formatting string. """
|
||||
if fmt in self.busses:
|
||||
return self.busses[ fmt ]
|
||||
return None
|
||||
|
||||
def getNet ( self, name, create=True ):
|
||||
"""
|
||||
Find a Net by name. If it doesn't exists and ``create`` is set to ``True``,
|
||||
|
@ -694,7 +739,9 @@ class BaseSRAM ( object ):
|
|||
, 'q' : 'net_output_X' } )
|
||||
"""
|
||||
masterCell = af.getCell( masterName, CRL.Catalog.State.Views )
|
||||
inst = Instance.create( self.cell, instName, masterCell )
|
||||
if not masterCell:
|
||||
raise ErrorMessage( 1, 'BaseSRAM.addInstance(): Cannot find cell "{}".'.format( masterName ))
|
||||
inst = Instance.create( self.cell, instName, masterCell )
|
||||
for masterNetName, netName in netMapNames.items():
|
||||
masterNet = masterCell.getNet( masterNetName )
|
||||
net = self.getNet( netName )
|
||||
|
@ -796,18 +843,18 @@ class BaseSRAM ( object ):
|
|||
bb = Box()
|
||||
bb.merge( self.decoder.place( 0 ) )
|
||||
bb.merge( self.rootGroup.place() )
|
||||
for inst, refInst in self.toHeaders:
|
||||
self.headers[ refInst.fold ].addInstanceAt( inst, refInst )
|
||||
for inst, refInst, headerRow in self.toHeaders:
|
||||
self.headers[ refInst.fold*2 + headerRow ].addInstanceAt( inst, refInst )
|
||||
for i in range(len(self.headers)):
|
||||
trace( 610, ',+', 'Place row header {} {}\n'.format( i, self.headers[i].row ))
|
||||
if i % 2:
|
||||
if i//2 % 2:
|
||||
xstart = bb.getXMax()
|
||||
direction = BaseSRAM.TO_LEFT
|
||||
else:
|
||||
xstart = self.decoder.width
|
||||
direction = BaseSRAM.TO_RIGHT
|
||||
bb.merge( self.headers[i].place( xstart
|
||||
, self.rootGroup.busWidth*(i + 1) + i
|
||||
, self.rootGroup.busWidth*(i//2 + 1) + i
|
||||
, direction ))
|
||||
trace( 610, '-,' )
|
||||
self.cell.setAbutmentBox( bb )
|
||||
|
|
|
@ -73,31 +73,50 @@ Provisional results
|
|||
|
||||
.. note:: All length are in micro-meters.
|
||||
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Kind | Generator | Yosys |
|
||||
+==============+=============================+=============================+
|
||||
| # Gates | 23209 (-25.4%) | 32121 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| 1 Fold |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Area | 7182 x 330 (-5.5%) | 7380 x 340 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Wirelength | 1841036 (-4.3%) | 1924153 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| 2 Fold |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Area | 3599 x 660 (-5.3%) | 3690 x 680 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Wirelength | 1670455 (-6.3%) | 1782558 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| 4 Fold |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Area | 1812 x 1320 (-4.6%) | 1900 x 1320 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
| Wirelength | 1699810 (-1.5%) | 1726436 |
|
||||
+--------------+-----------------------------+-----------------------------+
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| Arch | Kind | Generator | Yosys |
|
||||
+========+==============+=============================+=============================+
|
||||
| Mux | # Gates | 23209 (-25.4%) | 32121 |
|
||||
+--------+--------------+-----------------------------+ |
|
||||
| Nao | # Gates | 34637 (+7.8%) | |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| 1 Fold |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| | Area | 7182 x 330 (-5.5%) | 7380 x 340 |
|
||||
| Mux +--------------+-----------------------------+-----------------------------+
|
||||
| | Wirelength | 1841036 (-4.3%) | 1924153 |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| | Area | 6680 x 340 (-14.9%) | |
|
||||
| Nao +--------------+-----------------------------+ |
|
||||
| | Wirelength | 1637781 (-14.9%) | |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| 2 Fold |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| | Area | 3599 x 660 (-5.3%) | 3690 x 680 |
|
||||
| Mux +--------------+-----------------------------+-----------------------------+
|
||||
| | Wirelength | 1670455 (-6.3%) | 1782558 |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| | Area | 3350 x 680 (-9.2%) | |
|
||||
| Nao +--------------+-----------------------------+ |
|
||||
| | Wirelength | 1548358 (-13.1%) | |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| 4 Fold |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| | Area | 1812 x 1320 (-4.6%) | 1900 x 1320 |
|
||||
| Mux +--------------+-----------------------------+-----------------------------+
|
||||
| | Wirelength | 1699810 (-1.5%) | 1726436 |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
| | Area | 1692 x 1360 (-8.2%) | |
|
||||
| Nao +--------------+-----------------------------+ |
|
||||
| | Wirelength | 1512107 (-12.4%) | |
|
||||
+--------+--------------+-----------------------------+-----------------------------+
|
||||
|
||||
Conclusions that we can draw from those results are :
|
||||
|
||||
The difference between the two implementations resides only in the *output*
|
||||
multiplexer. With a 4 inputs mux made of mux2+mux3 or 2 inputs multiplexer
|
||||
made of alternate layers of nand2+nor2.
|
||||
|
||||
Conclusions for the mux2+mux3 implementation :
|
||||
|
||||
1. The generator version uses subtantially less gates than the Yosys one.
|
||||
As the both SRAM uses the exact same number of SFFs, the difference is
|
||||
|
@ -111,26 +130,42 @@ Conclusions that we can draw from those results are :
|
|||
|
||||
In particular, to build the output multiplexer we only have mx2 and
|
||||
mx3 cells, which are large. The density of the SRAM could be much
|
||||
increased if we did have nmx2 and nmx3. We could also try to synthesise
|
||||
the tree using nandX and norX but we are short of time.
|
||||
increased if we did have nmx2 and nmx3.
|
||||
|
||||
Furthermore for the output multiplexers, as it is a controlled case,
|
||||
we may also uses three-state drivers cells (which have not been
|
||||
ported either).
|
||||
|
||||
Conclusion for the nand2+nor2 implementation:
|
||||
|
||||
1. The multiplexer allows us for a much more compact area and noticeably
|
||||
lesser wire length. With an increased number of cells (not an issue).
|
||||
|
||||
2. The total wire length is extremely sensitive to the placement, which
|
||||
in our case is just a column ordering. To optimize, the binary tree
|
||||
(for the netlist) is not placed fully symmetrically but slightly
|
||||
"askew".
|
||||
|
||||
|
||||
.. note:: Cell width in the SkyWater 130 port of FlexLib:
|
||||
|
||||
============== =====
|
||||
Cell Width
|
||||
============== =====
|
||||
inv_x2 2
|
||||
mx2_x2 7
|
||||
mx3_x2 11
|
||||
a3_x2 5
|
||||
nand2_x0 2
|
||||
nand3_x0 3
|
||||
nand4_x0 4
|
||||
nor2_x0 2
|
||||
nor3_x0 3
|
||||
sff1_x4 15
|
||||
============== =====
|
||||
|
||||
Differrent ways of implementing the output multiplexer :
|
||||
|
||||
1. mx2_x2 + mx3_x2 = 18
|
||||
2. 9 * nand2_x0 = 18
|
||||
3. 4 * nand3_x0 + nand4_x0 = 16
|
||||
|
@ -178,15 +213,18 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
if fold == 1:
|
||||
pass
|
||||
elif fold == 2:
|
||||
self.foldTags = [ 'imux_addr0128' ]
|
||||
#self.foldTags = [ 'imux_addr0128' ]
|
||||
self.foldTags = [ 'imux_addr0192' ]
|
||||
elif fold == 4:
|
||||
self.foldTags = [ 'omux_0_to_127', 'imux_addr0128', 'imux_addr0240' ]
|
||||
#self.foldTags = [ 'imux_addr0064', 'imux_addr0128', 'imux_addr0192' ]
|
||||
#self.foldTags = [ 'omux_0_to_127', 'imux_addr0128', 'imux_addr0240' ]
|
||||
self.foldTags = [ 'imux_addr0096', 'imux_addr0192', 'imux_addr0160' ]
|
||||
else:
|
||||
raise ErrorMessage( 1, 'SRAM_256x32.__init__(): Unsupported fold {}, valid values are 1, 2, 4.'.format( fold ))
|
||||
self.cell = af.createCell( 'spram_256x32' )
|
||||
self.mx2Cell = af.getCell( 'mx2_x2', CRL.Catalog.State.Views )
|
||||
self.mx3Cell = af.getCell( 'mx3_x2', CRL.Catalog.State.Views )
|
||||
self.mx2Cell = self.confLib.getStdCell( 'mx2_x2' )
|
||||
self.mx3Cell = self.confLib.getStdCell( 'mx3_x2' )
|
||||
self.na2Cell = self.confLib.getStdCell( 'na2_x1' )
|
||||
self.no2Cell = self.confLib.getStdCell( 'no2_x1' )
|
||||
with UpdateSession():
|
||||
self.buildInterface()
|
||||
self.decoder = ColBlock( self, 'decod', 33 )
|
||||
|
@ -203,61 +241,149 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
, 'bit_addr{:04d}'.format( addr )
|
||||
, '_byte{byte}_{bbit}'
|
||||
, 32 ))
|
||||
bus = Bus( self, 'imux_addr{:04d}_b_q({{}})'.format(addr), 32 )
|
||||
bus = Bus( self, 'imux_addr{:04d}_q({{}})'.format(addr), 32 )
|
||||
bitGroup.childs[0].setBusNet( 'q', bus )
|
||||
bitGroup.childs[1].setBusNet( 'i', bus )
|
||||
bus = Bus( self, 'bit_addr{:04d}_b_q({{}})'.format(addr), 32 )
|
||||
bus = Bus( self, 'bit_addr{:04d}_q({{}})'.format(addr), 32 )
|
||||
bitGroup.childs[0].setBusNet( 'i0', bus )
|
||||
bitGroup.childs[1].setBusNet( 'q', bus )
|
||||
bus = Bus( self, 'di({})', 32 )
|
||||
bitGroup.childs[0].setBusNet( 'i1', bus )
|
||||
bitGroup.childs[1].setCmdNet( 'ck', self.getNet( 'clk' ))
|
||||
omuxGroupsCurr = []
|
||||
omuxGroupsNext = []
|
||||
muxDepth = 0
|
||||
for i in range(256//4):
|
||||
childs = []
|
||||
for addr in range(i*4, (i+1)*4):
|
||||
tag = SRAM_256x32.BIT_GROUP_FMT.format( addr )
|
||||
childs.append( self.rootGroup.findChild( tag ))
|
||||
childs[-1].unGroup()
|
||||
omuxGroupsCurr.append( self._doMux4( childs, muxDepth ))
|
||||
while len(omuxGroupsCurr) >= 4:
|
||||
trace( 610, '\tGrouping {} elements.\n'.format( len(omuxGroupsCurr )))
|
||||
muxDepth += 1
|
||||
for i in range(len(omuxGroupsCurr)//4):
|
||||
omuxGroupsNext.append( self._doMux4( omuxGroupsCurr[i*4:(i+1)*4], muxDepth ))
|
||||
omuxGroupsCurr = omuxGroupsNext
|
||||
omuxGroupsNext = []
|
||||
for group in omuxGroupsCurr:
|
||||
self.rootGroup.group( group )
|
||||
self.buildDecoder()
|
||||
#self._buildOutputMux_mx23()
|
||||
self._buildOutputMux_nao23()
|
||||
#inst = self.addInstance( 'inv_x2'
|
||||
# , 'nrst_inv'
|
||||
# , { 'i' : 'rst'
|
||||
# , 'nq' : 'n_rst'
|
||||
# }
|
||||
# )
|
||||
#self.decoder.addInstance( 0, inst )
|
||||
inst = self.addInstance( 'inv_x2'
|
||||
, 'nrst_inv'
|
||||
, { 'i' : 'rst'
|
||||
, 'nq' : 'nrst'
|
||||
, 'nce_inv'
|
||||
, { 'i' : 'ce'
|
||||
, 'nq' : 'n_ce'
|
||||
}
|
||||
)
|
||||
self.decoder.addInstance( 0, inst )
|
||||
for child in self.rootGroup.childs[0].childs:
|
||||
if child.kind == Column.KIND_COLUMN:
|
||||
if child.insts[0].getMasterCell() != self.mx3Cell:
|
||||
continue
|
||||
rstCol = Column( self
|
||||
, af.getCell( 'a2_x2', CRL.Catalog.State.Views )
|
||||
, 'omux_nrst'
|
||||
, '_byte{byte}_{bbit}'
|
||||
, 32 )
|
||||
busOMux = Bus( self, child.tag+'_b_q({})', 32 )
|
||||
busDato = Bus( self, 'dato({})', 32 )
|
||||
child .setBusNet( 'q' , busOMux )
|
||||
rstCol.setBusNet( 'i0', busOMux )
|
||||
rstCol.setCmdNet( 'i1', self.getNet('nrst') )
|
||||
rstCol.setBusNet( 'q' , busDato )
|
||||
self.rootGroup.group( rstCol )
|
||||
self.buildDecoder()
|
||||
if child.insts[0].getMasterCell() == self.mx3Cell:
|
||||
rstCol = Column( self
|
||||
, af.getCell( 'a2_x2', CRL.Catalog.State.Views )
|
||||
, 'omux_n_rst'
|
||||
, '_byte{byte}_{bbit}'
|
||||
, 32 )
|
||||
busOMux = Bus( self, child.tag+'_q({})', 32 )
|
||||
busDato = Bus( self, 'dato({})', 32 )
|
||||
child .setBusNet( 'q' , busOMux )
|
||||
rstCol.setBusNet( 'i0', busOMux )
|
||||
rstCol.setCmdNet( 'i1', self.getNet('n_rst') )
|
||||
rstCol.setBusNet( 'q' , busDato )
|
||||
self.rootGroup.group( rstCol )
|
||||
break
|
||||
omuxRoot = self.rootGroup.findChild( 'omux_0_to_255' )
|
||||
rstCol = Column( self
|
||||
, self.no2Cell
|
||||
, 'omux_rst'
|
||||
, '_byte{byte}_{bbit}'
|
||||
, 32 )
|
||||
busOMux = Bus( self, 'omux_0_to_255_nq({})', 32 )
|
||||
busDato = Bus( self, 'dato({})', 32 )
|
||||
omuxRoot.setBusNet( 'nq', busOMux )
|
||||
rstCol.setBusNet( 'i0', busOMux )
|
||||
rstCol.setCmdNet( 'i1', self.getNet('rst') )
|
||||
rstCol.setBusNet( 'nq', busDato )
|
||||
omuxRoot.parent.group( rstCol, after=omuxRoot )
|
||||
af.saveCell( self.cell, CRL.Catalog.State.Logical )
|
||||
|
||||
def _doMux4 ( self, childs, muxDepth ):
|
||||
def _buildOutputMux_nao23 ( self ):
|
||||
"""
|
||||
Build the complete output mux based on successive layers of NAND2
|
||||
then NOR2. More compact than the mux based version.
|
||||
|
||||
Use an "askew" tree to minimize wiring.
|
||||
"""
|
||||
muxDepth = 0
|
||||
levels = [ [], ]
|
||||
for addr in range(256):
|
||||
oneHotName = 'rdecod_' + self._getDecodNetName( addr, 8 ).replace('_n_','_')
|
||||
tag = SRAM_256x32.BIT_GROUP_FMT.format( addr )
|
||||
bitGroup = self.rootGroup.findChild(tag)
|
||||
bitGroup.unGroup()
|
||||
tag = 'sel_' + tag[:-2]
|
||||
nand2Col = Column( self
|
||||
, self.na2Cell
|
||||
, tag
|
||||
, '_byte{byte}_{bbit}'
|
||||
, 32 )
|
||||
nand2Col.setCmdNet( 'i0', self.getNet( oneHotName ))
|
||||
busDff = self.getBus( 'bit_addr{:04d}_q({{}})'.format(addr) )
|
||||
nand2Col.setBusNet( 'i1', busDff )
|
||||
bitGroup.group( nand2Col )
|
||||
levels[0].append( (bitGroup, nand2Col ) )
|
||||
while len(levels[muxDepth]) > 1:
|
||||
levels.append( [] )
|
||||
childIndex = 1 if muxDepth else 2
|
||||
for i in range(len( levels[muxDepth]) // 2 ):
|
||||
naoCell = self.no2Cell if muxDepth%2 else self.na2Cell
|
||||
childs = [ levels[muxDepth][i*2][0], levels[muxDepth][i*2+1][0] ]
|
||||
leftRoot = levels[muxDepth][i*2 ][1]
|
||||
rightRoot = levels[muxDepth][i*2 + 1][1]
|
||||
tags = [ childs[0].tag, childs[1].tag ]
|
||||
naoTag = SRAM_256x32._mergeOMuxTags( tags )
|
||||
naoGroup = ColGroup( naoTag+'_g' )
|
||||
trace( 610, ',+', '\tSRAM_256x32._buildOutputmux() {} + {} -> {}\n' \
|
||||
.format( tags[0], tags[1], naoTag ))
|
||||
nao2Col = Column( self
|
||||
, naoCell
|
||||
, naoTag
|
||||
, '_byte{byte}_{bbit}'
|
||||
, 32 )
|
||||
naoGroup.group( childs[0] )
|
||||
naoGroup.group( childs[1] )
|
||||
bus0 = Bus( self, tags[0][:-2]+'_nq({})', 32 )
|
||||
bus1 = Bus( self, tags[1][:-2]+'_nq({})', 32 )
|
||||
busNao = Bus( self, naoTag+'_nq({})', 32 )
|
||||
leftRoot .setBusNet( 'nq', bus0 )
|
||||
rightRoot.setBusNet( 'nq', bus1 )
|
||||
nao2Col.setBusNet( 'i0', bus0 )
|
||||
nao2Col.setBusNet( 'i1', bus1 )
|
||||
childs[1].reverse()
|
||||
rightRoot.parent.group( nao2Col, before=rightRoot )
|
||||
trace( 610, '\tInsert mux {} before {}\n'.format( nao2Col, rightRoot.parent ))
|
||||
levels[muxDepth+1].append( (naoGroup, nao2Col) )
|
||||
trace( 610, '-,' )
|
||||
muxDepth += 1
|
||||
self.rootGroup.group( levels[muxDepth][0][0] )
|
||||
|
||||
def _buildOutputMux_mx23 ( self ):
|
||||
"""
|
||||
Build the complete output mux based on successive layers of mux4,
|
||||
each mux4 beeing built upon a mux2 + mux3 (_doMux4_mux23).
|
||||
"""
|
||||
omuxGroupsCurr = []
|
||||
omuxGroupsNext = []
|
||||
muxDepth = 0
|
||||
for i in range(256//4):
|
||||
childs = []
|
||||
for addr in range(i*4, (i+1)*4):
|
||||
tag = SRAM_256x32.BIT_GROUP_FMT.format( addr )
|
||||
childs.append( self.rootGroup.findChild( tag ))
|
||||
childs[-1].unGroup()
|
||||
omuxGroupsCurr.append( self._doMux4_mx23( childs, muxDepth ))
|
||||
while len(omuxGroupsCurr) >= 4:
|
||||
trace( 610, '\tGrouping {} elements.\n'.format( len(omuxGroupsCurr )))
|
||||
muxDepth += 1
|
||||
for i in range(len(omuxGroupsCurr)//4):
|
||||
omuxGroupsNext.append( self._doMux4_mx23( omuxGroupsCurr[i*4:(i+1)*4], muxDepth ))
|
||||
omuxGroupsCurr = omuxGroupsNext
|
||||
omuxGroupsNext = []
|
||||
for group in omuxGroupsCurr:
|
||||
self.rootGroup.group( group )
|
||||
|
||||
def _doMux4_mx23 ( self, childs, muxDepth ):
|
||||
"""
|
||||
Build a 4 entry mux. It uses a mux2 / mux3 combination.
|
||||
Returns a newly build group.
|
||||
|
@ -276,7 +402,7 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
mux2Tag = SRAM_256x32._mergeOMuxTags( tags[0:2] )
|
||||
mux3Tag = SRAM_256x32._mergeOMuxTags( tags )
|
||||
muxGroup = ColGroup( muxTag+'_g' )
|
||||
trace( 610, ',+', '\tSRAM_256x32._doMux4() {} + {} -> {}\n' \
|
||||
trace( 610, ',+', '\tSRAM_256x32._doMux4_mx23() {} + {} -> {}\n' \
|
||||
.format( mux2Tag, mux3Tag, muxTag ))
|
||||
mux2Col = Column( self
|
||||
, self.mx2Cell
|
||||
|
@ -297,11 +423,11 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
muxGroup.group( childs[2] )
|
||||
muxGroup.group( mux3Col )
|
||||
muxGroup.group( childs[3] )
|
||||
bus0 = Bus( self, tags[0][:-2]+'_b_q({})', 32 )
|
||||
bus1 = Bus( self, tags[1][:-2]+'_b_q({})', 32 )
|
||||
bus2 = Bus( self, tags[2][:-2]+'_b_q({})', 32 )
|
||||
bus3 = Bus( self, tags[3][:-2]+'_b_q({})', 32 )
|
||||
busMx2 = Bus( self, mux2Tag+'_b_q({})', 32 )
|
||||
bus0 = Bus( self, tags[0][:-2]+'_q({})', 32 )
|
||||
bus1 = Bus( self, tags[1][:-2]+'_q({})', 32 )
|
||||
bus2 = Bus( self, tags[2][:-2]+'_q({})', 32 )
|
||||
bus3 = Bus( self, tags[3][:-2]+'_q({})', 32 )
|
||||
busMx2 = Bus( self, mux2Tag+'_q({})', 32 )
|
||||
childs[0].childs[ childIndex ].setBusNet( 'q', bus0 )
|
||||
childs[1].childs[ childIndex ].setBusNet( 'q', bus1 )
|
||||
childs[2].childs[ childIndex ].setBusNet( 'q', bus2 )
|
||||
|
@ -331,8 +457,10 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
addrs = []
|
||||
for tag in tags:
|
||||
end = -2 if tag.endswith('_g') else 0
|
||||
if tag.startswith('bit'):
|
||||
if tag.startswith('bit_addr'):
|
||||
addrs.append( int( tag[8:end] ))
|
||||
if tag.startswith('sel_bit_addr'):
|
||||
addrs.append( int( tag[12:end] ))
|
||||
elif tag.startswith('omux'):
|
||||
m = vectorRe.match( tag )
|
||||
addrs += [ int(m.group('lsb')), int(m.group('msb')) ]
|
||||
|
@ -397,14 +525,14 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
if addrWidth == 2:
|
||||
indexFirstBit = (oneHot >> addrWidth) * addrWidth
|
||||
valueAddr = oneHot % (1 << addrWidth)
|
||||
trunkName = self._getDecodNetName( oneHot, addrWidth )
|
||||
instConf.append( 'a2_x2' )
|
||||
instConf.append( 'decod_a2_{}'.format( trunkName ))
|
||||
trunkName = 'n_'+self._getDecodNetName( oneHot, addrWidth )
|
||||
instConf.append( self.confLib.getStdCellName('na2_x1') )
|
||||
instConf.append( 'decod_nand2_{}'.format( trunkName ))
|
||||
instConf.append( {} )
|
||||
for i in range(2):
|
||||
inv = '' if (valueAddr & (1 << i)) else 'n_'
|
||||
instConf[2][ 'i{}'.format(i) ] = '{}addr({})'.format( inv, indexFirstBit+i )
|
||||
instConf[2][ 'q' ] = 'decod_'+trunkName
|
||||
instConf[2][ 'nq' ] = 'decod_'+trunkName
|
||||
elif addrWidth == 4 or addrWidth == 8:
|
||||
halfWidth = addrWidth>>1
|
||||
halfMask = 0
|
||||
|
@ -413,15 +541,16 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
indexFirstBit = (oneHot >> addrWidth) * addrWidth
|
||||
valueAddr = oneHot % (1 << addrWidth)
|
||||
trunkName = self._getDecodNetName( oneHot, addrWidth )
|
||||
instConf.append( 'a2_x2' )
|
||||
instConf.append( 'decod_a2_{}'.format( trunkName ))
|
||||
gate = 'no2_x1' if addrWidth == 4 else 'na2_x1'
|
||||
instConf.append( self.confLib.getStdCellName(gate) )
|
||||
instConf.append( 'decod_{}_{}'.format( gate[:-3], trunkName ))
|
||||
instConf.append( {} )
|
||||
offset = (oneHot >> addrWidth) << (halfWidth+1)
|
||||
oneHot0 = (oneHot & halfMask) + offset
|
||||
instConf[2][ 'i0' ] = 'decod_'+self._getDecodNetName( oneHot0, halfWidth )
|
||||
instConf[2][ 'i0' ] = 'decod_n_'+self._getDecodNetName( oneHot0, halfWidth )
|
||||
oneHot1 = ((oneHot >> halfWidth) & halfMask) + (1<<(halfWidth)) + offset
|
||||
instConf[2][ 'i1' ] = 'decod_'+self._getDecodNetName( oneHot1, halfWidth )
|
||||
instConf[2][ 'q' ] = 'decod_'+trunkName
|
||||
instConf[2][ 'i1' ] = 'decod_n_'+self._getDecodNetName( oneHot1, halfWidth )
|
||||
instConf[2][ 'nq' ] = 'decod_n_'+trunkName
|
||||
trace( 610, '\t{:08b} {:3d}:{} + {:3d}:{} => {:3d}::{:08b}:{}\n' \
|
||||
.format( halfMask
|
||||
, oneHot0, self._getDecodNetName( oneHot0, halfWidth )
|
||||
|
@ -449,6 +578,14 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
)
|
||||
self.decoder.addInstance( bit * 4, inst )
|
||||
self.connect( 'raddr_sff_{}'.format(bit), 'ck', 'clk' )
|
||||
for we in range(4):
|
||||
inst = self.addInstance( 'inv_x1'
|
||||
, 'decod_n_we_{}'.format(we)
|
||||
, { 'i' : 'we({})'.format(we)
|
||||
, 'nq' : 'n_we({})'.format(we)
|
||||
}
|
||||
)
|
||||
self.decoder.addInstance( we*4 + 1, inst )
|
||||
for bit in range(8):
|
||||
inst = self.addInstance( 'inv_x1'
|
||||
, 'decod_inv_{}'.format(bit)
|
||||
|
@ -460,6 +597,7 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
for oneHot in range(16):
|
||||
trace( 610, '\t{}\n'.format( self._getDecodNetName(oneHot,2) ))
|
||||
instDatas = self._getDecodInstConf( oneHot, 2 )
|
||||
print( instDatas )
|
||||
inst = self.addInstance( instDatas[0], instDatas[1], instDatas[2] )
|
||||
self.decoder.addInstance( oneHot*2 + 1, inst )
|
||||
for oneHot in range(32):
|
||||
|
@ -473,21 +611,38 @@ class SRAM_256x32 ( BaseSRAM ):
|
|||
inst = self.addInstance( instDatas[0], instDatas[1], instDatas[2] )
|
||||
dffCol = self.rootGroup.findChild( bitTag )
|
||||
imuxCol = self.rootGroup.findChild( imuxTag )
|
||||
self.toHeaders.append(( inst, imuxCol.insts[0] ))
|
||||
self.toHeaders.append(( inst, imuxCol.insts[0], 0 ))
|
||||
for we in range(4):
|
||||
cmdNetName = 'decod_addr{:04d}_we({})'.format( oneHot, we )
|
||||
inst = self.addInstance( 'a3_x2'
|
||||
, 'decod_a3_we_{}_{}'.format(we,oneHot)
|
||||
, { 'i0' : instDatas[2]['q']
|
||||
, 'i1' : 'ce'
|
||||
, 'i2' : 'we({})'.format(we)
|
||||
, 'q' : cmdNetName
|
||||
inst = self.addInstance( self.confLib.getStdCellName('no3_x1')
|
||||
, 'decod_no3_we_{}_{}'.format(we,oneHot)
|
||||
, { 'i0' : instDatas[2]['nq']
|
||||
, 'i1' : 'n_ce'
|
||||
, 'i2' : 'n_we({})'.format(we)
|
||||
, 'nq' : cmdNetName
|
||||
}
|
||||
)
|
||||
self.toHeaders.append(( inst, dffCol.insts[0] ))
|
||||
self.toHeaders.append(( inst, imuxCol.insts[0], 0 ))
|
||||
for bit in range(8):
|
||||
self.connect( 'imux_addr{:04d}_byte{byte}_{bbit}'.format( oneHot, byte=we, bbit=bit )
|
||||
, 'cmd'
|
||||
, cmdNetName
|
||||
)
|
||||
oneHotName = instDatas[2]['nq'].replace('_n_','_')
|
||||
inst = self.addInstance( 'inv_x1'
|
||||
, 'omux_onehot_inv_{:04d}'.format(oneHot)
|
||||
, { 'i' : instDatas[2]['nq']
|
||||
, 'nq' : oneHotName
|
||||
}
|
||||
)
|
||||
self.toHeaders.append(( inst, imuxCol.insts[0], 0))
|
||||
sffName = 'omux_onehot_dff_{:04d}'.format(oneHot)
|
||||
inst = self.addInstance( 'sff1_x4'
|
||||
, sffName
|
||||
, { 'i' : oneHotName
|
||||
, 'q' : 'r'+oneHotName
|
||||
}
|
||||
)
|
||||
self.connect( sffName, 'ck', 'clk' )
|
||||
self.toHeaders.append(( inst, imuxCol.insts[0], 1 ))
|
||||
trace( 610, '-,' )
|
||||
|
|
Loading…
Reference in New Issue