diff --git a/cumulus/src/CMakeLists.txt b/cumulus/src/CMakeLists.txt index 80a46fb2..5ec7f5a2 100644 --- a/cumulus/src/CMakeLists.txt +++ b/cumulus/src/CMakeLists.txt @@ -13,8 +13,6 @@ ${CMAKE_CURRENT_SOURCE_DIR}/plugins/chiproute.py ${CMAKE_CURRENT_SOURCE_DIR}/plugins/conductor.py ${CMAKE_CURRENT_SOURCE_DIR}/plugins/matrixplacer.py - ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sramplacer1.py - ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sramplacer2.py ${CMAKE_CURRENT_SOURCE_DIR}/plugins/block.py ${CMAKE_CURRENT_SOURCE_DIR}/plugins/rsave.py ${CMAKE_CURRENT_SOURCE_DIR}/plugins/rsaveall.py @@ -45,6 +43,12 @@ set ( pyTools ${CMAKE_CURRENT_SOURCE_DIR}/tools/blif2vst.py ${CMAKE_CURRENT_SOURCE_DIR}/tools/yosys.py ) + set ( pyPluginSRAM ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sram/__init__.py + ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sram/sram.py + ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sram/sram_256x32.py + ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sram/sramplacer1.py + ${CMAKE_CURRENT_SOURCE_DIR}/plugins/sram/sramplacer2.py + ) set ( pyPluginAlpha ${CMAKE_CURRENT_SOURCE_DIR}/plugins/alpha/__init__.py ${CMAKE_CURRENT_SOURCE_DIR}/plugins/alpha/utils.py ) @@ -92,6 +96,7 @@ install ( FILES ${pyPluginC2C} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/core2chip ) install ( FILES ${pyPluginC2C} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/core2chip ) install ( FILES ${pyPluginChip} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/chip ) + install ( FILES ${pyPluginSRAM} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/sram ) install ( FILES ${pyPluginAlpha} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/alpha ) install ( FILES ${pyPluginAlphaBlock} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/alpha/block ) install ( FILES ${pyPluginAlphaC2C} DESTINATION ${Python_CORIOLISLIB}/cumulus/plugins/alpha/core2chip ) diff --git a/cumulus/src/plugins/sram/__init__.py b/cumulus/src/plugins/sram/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cumulus/src/plugins/sram/sram.py b/cumulus/src/plugins/sram/sram.py new file mode 100644 index 00000000..d27f7759 --- /dev/null +++ b/cumulus/src/plugins/sram/sram.py @@ -0,0 +1,819 @@ + +# This file is part of the Coriolis Software. +# Copyright (c) Sorbonne Université 2022-2022, All Rights Reserved +# +# +-----------------------------------------------------------------+ +# | C O R I O L I S | +# | C u m u l u s - P y t h o n T o o l s | +# | | +# | Author : Jean-Paul CHAPUT | +# | E-mail : Jean-Paul.Chaput@lip6.fr | +# | =============================================================== | +# | Python : "./plugins/sram.py" | +# +-----------------------------------------------------------------+ + + +""" +The ``sram`` module provide base classes for SRAM assemby. +""" + + +import sys +import re +import traceback +import helpers +from helpers.io import ErrorMessage, WarningMessage +from helpers.overlay import UpdateSession +from helpers import trace, l, u, n +import plugins +from Hurricane import Breakpoint, DbU, Box, Net, Cell, Instance, \ + Transformation, PythonAttributes +import CRL +from Foehn import FoehnEngine, DagExtension +from plugins.chip.configuration import GaugeConf + + +af = CRL.AllianceFramework.get() + + +# -------------------------------------------------------------------- +# Class : StdCellConf. + +class StdCellConf ( object ): + """ + Gives meta informations about the standard cell library. + """ + + reDFF = re.compile( r'^sff' ) + reMux = re.compile( r'^n?mx[0-9]_' ) + reDataIn = re.compile( r'^i[0-9]?' ) + reDataOut = re.compile( r'^n?q' ) + + def isRegister ( self, cell ): + """Returns True if the cell is a register.""" + m = StdCellConf.reDFF.match( cell.getName() ) + if m: return True + return False + + def isMux ( self, cell ): + """Returns True if the cell is a multiplexer.""" + m = StdCellConf.reMux.match( cell.getName() ) + if m: return True + return False + + def isDataIn ( self, net ): + """Returns True if the net is a data input.""" + m = StdCellConf.reDataIn.match( net.getName() ) + if m: + if not (net.getDirection() & Net.Direction.DirIn): + print( Warning( [ 'StdCellConf.isDataIn(): Match input pattern "^i[0_9]" but not in input direction.' + , 'On {}'.format(net) ] )) + return True + return False + + def isDataOut ( self, net ): + """Returns True if the net is a data output.""" + m = StdCellConf.reDataOut.match( net.getName() ) + if m: + if not (net.getDirection() & Net.Direction.DirOut): + print( Warning( [ 'StdCellConf.isDataOut(): Match output pattern "^i[0_9]" but not in output direction.' + , 'On {}'.format(net) ] )) + return True + return False + + def isData ( self, net ): + """Returns True if the net is a data flow (i.e. not a control).""" + return self.isDataIn(net) or self.isDataOut(net) + + +# -------------------------------------------------------------------- +# Class : Bus. + +class Bus ( object ): + """ + A set of Net to be manipulated as a bus. + """ + + def __init__ ( self, sram, netFmt, bits ): + self.sram = sram + self.nets = [] + if isinstance(bits,int): + bits = range( bits ) + for bit in bits: + self.nets.append( self.sram.getNet( netFmt.format( bit ))) + self.sram.busses[ netFmt ] = self + + @property + def busWidth ( self ): + return len( self.nets ) + + def __getitem__ ( self, i ): + return self.nets[ i ] + + +# -------------------------------------------------------------------- +# Class : Column. + +class Column ( object ): + """ + Build & manage a column of identical master cells. + """ + KIND_COLUMN = 0x0001 + KIND_BLOCK = 0x0002 + KIND_GROUP = 0x0004 + + def __init__ ( self, sram, masterCell, tag, bitNaming, busWidth ): + """ + Build (instanciate) a column of identical insts of ``masterCell``. + Instances names are generated through the ``instNaming`` callable. + """ + self.sram = sram + self.tag = tag + self.parent = None + self.order = None + self.bitNaming = bitNaming + self.busWidth = busWidth + self.insts = [] + self.busPlugs = {} + self.revert = False + naming = ColNaming( self.tag + self.bitNaming ) + for bit in range(busWidth): + self.insts.append( Instance.create( self.sram.cell, naming(bit), masterCell )) + for net in masterCell.getNets(): + if self.sram.confLib.isDataIn(net) or self.sram.confLib.isDataOut(net): + busPlug = [] + for inst in self.insts: + busPlug.append( inst.getPlug( net )) + self.busPlugs[ net.getName() ] = busPlug + + def __del__ ( self ): + """ Needed to disable Python attribute uses on reference instance. """ + PythonAttributes.disable( self.insts[0] ) + + @property + def kind ( self ): return Column.KIND_COLUMN + + @property + def depth ( self ): return 0 + + @property + def width ( self ): + """ Width of the Column. """ + return self.insts[0].getMasterCell().getAbutmentBox().getWidth() + + @property + def root ( self ): + """ Return the root of the tree. """ + if self.parent: + return self.parent.root + return self + + def __repr__ ( self ): + return ''.format( self.insts[0].getMasterCell().getName() + , self.depth + , self.tag + , self.busWidth ) + + def findChild ( self, tag ): + """ + Terminal function for ColGroup.findChild(), display itsef if ``tag`` match. + """ + if self.tag == tag: return self + return None + + def setBusNet ( self, busName, busNet ): + """ + Connect a bus to the column. ``busName`` is the name of the master net + in the reference cell of the column. + """ + busPlug = self.busPlugs[ busName ] + if busPlug[0].getNet() and busPlug[0].getNet() != busNet[0]: + print( Warning( 'Column.setBusNet(): Overrode {} {} -> {} with {}' \ + .format( busPlug[0].getInstance() + , busName + , busPlug[0].getNet() + , busNet[0] ))) + for i in range(self.busWidth): + busPlug[ i ].setNet( busNet[i] ) + trace( 605, '\tsetBusNet {} {} -> {}\n'.format( busPlug[0].getInstance(), busName, busNet[0] )) + + def setCmdNet ( self, cmd, net ): + masterNet = self.insts[0].getMasterCell().getNet( cmd ) + for bus in range(self.busWidth): + self.insts[ bus ].getPlug( masterNet ).setNet( net ) + + def setCutCost ( self, order ): + self.order = order + 1 + return self.order + + def reverse ( self ): + """ reverse() toggle a MX symmetry to the column. """ + self.revert = not self.revert + + def place ( self ): + """ Perform the placement of the column at ``x``. """ + trace( 610, ',+', "\tColumn.place() tag={}\n".format( self.tag )) + bb = Box() + self.sram.doFoldAtColumn( self ) + x, irow = self.sram.foldState.getPosition() + for bit in range(self.busWidth): + if bit == 0: + PythonAttributes.enable( self.insts[0] ) + self.insts[0].fold = self.sram.foldState.fold + trace( 610, "\tx={} irow={} tag={}\n" \ + .format( DbU.getValueString(x), irow+bit, self.tag )) + self.sram.placeInstance( self.insts[ bit ], x, irow+bit, self.sram.foldState.direction, self.revert ) + bb.merge( self.insts[bit].getAbutmentBox() ) + self.sram.foldState.addWidth( self.width ) + trace( 610, '-,' ) + return bb + + def showTree ( self, depth ): + """ Terminal function for ColGroup.showTree(), display itsef. """ + print( '{}| {}'.format( ' '*depth, self )) + + + +# -------------------------------------------------------------------- +# Class : ColBlock. + +class ColBlock ( object ): + """ + Manage an column made of irregular instances, more than one instance + can be put on each slice. + """ + + def __init__ ( self, sram, tag, busWidth ): + """ + Manage an column made of irregular instances, more than one instance + can be put on each slice. + """ + self.sram = sram + self.tag = tag + self.parent = None + self.busWidth = busWidth + self.width = 0 + self.widths = [] + self.rows = [] + for i in range(self.busWidth): + self.rows.append( [] ) + self.widths.append( 0 ) + + @property + def kind ( self ): return Column.KIND_BLOCK + + @property + def depth ( self ): return 0 + + @property + def root ( self ): + """ Return the root of the tree. """ + if self.parent: + return self.parent.root + return self + + def __repr__ ( self ): + return ''.format( self.depth, self.tag, self.busWidth ) + + def showTree ( self, depth ): + """ Terminal function for ColGroup.showTree(), display itsef. """ + print( '{}| {}'.format( ' '*depth, self )) + + def addInstance ( self, irow, inst ): + """ + Add an instance to the column block. Try to add the instance in + rows ``[irow,irow+3]`` so the width increase will be minimized. + """ + inarrower = irow + for i in range( irow+1, min( irow+4, len(self.rows))): + if self.widths[ i ] < self.widths[ inarrower ]: + inarrower = i + self.rows [ inarrower ].append( inst ) + self.widths[ inarrower ] += inst.getMasterCell().getAbutmentBox().getWidth() + if self.widths[ inarrower ] > self.width: + self.width = self.widths[ inarrower ] + + def place ( self, x ): + """ Perform the placement of the column block at ``x``. """ + bb = Box() + for irow in range(self.busWidth): + xrow = x + for inst in self.rows[irow]: + self.sram.placeInstance( inst, xrow, irow ) + xrow += inst.getMasterCell().getAbutmentBox().getWidth() + bb.merge( inst.getAbutmentBox() ) + return bb + + +# -------------------------------------------------------------------- +# Class : ColGroup. + +class ColGroup ( object ): + """ + Manage a group of Column and/or ColGroup (recursive). + """ + + def __init__ ( self, tag ): + """ + Initialize an *empty* column group. Sub-group or columns must be + added afterwards. + """ + self.tag = tag + self.parent = None + self.order = None + self.depth = 0 + self.childs = [] + + def __iter__ ( self ): + return ColGroupIterator( self ) + + def __repr__ ( self ): + return ''.format( self.depth, self.tag, len(self.childs )) + + @property + def kind ( self ): return Column.KIND_GROUP + + @property + def width ( self ): + """ Width of the whole ColGroup (sum of all it's children's width). """ + width = 0 + for child in self.childs: + width += child.width + return width + + @property + def root ( self ): + """ Return the root of the tree. """ + if self.parent: + return self.parent.root + return self + + @property + def busWidth ( self ): + """ + Return the width of the group. This the widest bus width of all the childs. + """ + busWidth = 0 + for child in self.childs: + busWidth = max( busWidth, child.busWidth ) + return busWidth + + def group ( self, child ): + """ Add a new child to the group. """ + self.childs.append( child ) + child.parent = self + self.depth = max( self.depth, child.depth+1 ) + + def unGroup ( self, child=None ): + """ Remove a child from the group (the child is *not* deleted). """ + if child is None: + if not self.parent: + return + self.parent.unGroup( self ) + return + if child in self.childs: + self.childs.remove( child ) + child.parent = None + self.depth = 0 + for child in self.childs: + self.depth = max( self.depth, child.depth ) + self.depth += 1 + + def findChild ( self, tag ): + """ Recursively find a child in a goup by it's tag name. """ + if self.tag == tag: return self + for child in self.childs: + rchild = child.findChild( tag ) + if rchild: return rchild + return None + + def setCutCost ( self, order ): + order += 1 + self.order = order + for child in self.childs: + order = child.setCutCost( order ) + order += 1 + return order + + def reverse (self ): + """ + Reverse the order of the childs of the group *and* perform it recursively on each + child itself. + """ + for child in self.childs: + child.reverse() + self.childs.reverse() + + def place ( self ): + """ Place childs/colums from left to rigth. """ + bb = Box() + for child in self.childs: + bb.merge( child.place() ) + return bb + + def showTree ( self, depth=0 ): + """ Display the tree rooted at this ColGroup. """ + print( '{}+ {}'.format( ' '*depth, self )) + for child in self.childs: + child.showTree( depth+1 ) + + +# -------------------------------------------------------------------- +# Class : ColGroupIterator. + +class ColGroupIterator ( object ): + """ + Provide an iterator over all the *leaf* instances of a ColGroup tree. + (all intermediate ColGroup are ignored) + """ + + def __init__ ( self, colGroup ): + self._colGroup = colGroup + self._index = 0 + self._subIter = None + + def __next__ ( self ): + while True: + try: + if self._subIter: + column = next( self._subIter ) + return column + except StopIteration: + self._subIter = None + if self._index < len(self._colGroup.childs): + column = self._colGroup.childs[self._index] + self._index += 1 + if isinstance(column,ColGroup): + self._subIter = iter( column ) + continue + return column + else: + raise StopIteration + + +# -------------------------------------------------------------------- +# Class : ColNaming. + +class ColNaming ( object ): + """ + Callable to generate the individual instances name of each bit + in a regular column. + """ + + def __init__ ( self, instFormat ): + self.instFormat = instFormat + + def __call__ ( self, bit ): + return self.instFormat.format( wbit=bit, bbit=(bit%8), byte=(bit//8) ) + + +# -------------------------------------------------------------------- +# Class : HeaderRow. + +class HeaderRow ( object ): + """ + Build the top row of the memory area datapath. Usually containing + the buffers for the local command drivers. + """ + + def __init__ ( self, sram ): + self.sram = sram + self.row = {} + + def addInstanceAt ( self, inst, refInst ): + """ + Add an Instance in the header with it's reference instance, taken + from the to be associated Column. The reference instance gives a + hint about the X position the cell must be placed. + """ + if refInst not in self.row: + self.row[ refInst ] = [] + self.row[ refInst ].append( inst ) + + def place ( self, xstart, irow, direction ): + """ + Perform the placement of the header at ``(x,irow)``. Instances + in the row are placed as close as possible to the X position of + their reference instance (part of a Column). + """ + def getXMin ( inst ): + return inst.getAbutmentBox().getXMin() + + def getXMax ( inst ): + return inst.getAbutmentBox().getXMax() + + + trace( 610, ',+', '\tHeaderRow.place() {}\n'.format( DbU.getValueString(xstart) )) + xsorteds = [] + reverse = (direction != BaseSRAM.TO_RIGHT) + for refInst in self.row.keys(): + trace( 610, '\trefInst.getXMin() {}\n'.format( DbU.getValueString(refInst.getAbutmentBox().getXMin()) )) + xsorteds.append( refInst ) + bb = Box() + xsorteds.sort( key=getXMin, reverse=reverse ) + if direction == BaseSRAM.TO_RIGHT: + x = xstart + for refInst in xsorteds: + xmin = getXMin( refInst ) + if xmin > x: + x = xmin + for inst in self.row[ refInst ]: + self.sram.placeInstance( inst, x, irow, direction ) + bb.merge( inst.getAbutmentBox() ) + x += inst.getMasterCell().getAbutmentBox().getWidth() + else: + x = xstart + for refInst in xsorteds: + xmin = getXMax( refInst ) + if xmin < x: + x = xmin + for inst in self.row[ refInst ]: + self.sram.placeInstance( inst, x, irow, direction ) + bb.merge( inst.getAbutmentBox() ) + x -= inst.getMasterCell().getAbutmentBox().getWidth() + trace( 610, '-,' ) + return bb + + +# -------------------------------------------------------------------- +# Class : FoldState. + +class FoldState ( object ): + """ + Manage information about how to fold the group tree and the + current information during the placement process. + """ + + def __init__ ( self, sram, fold ): + self.sram = sram + self.foldNb = fold + self.fold = 0 + + def setupDimensions ( self ): + """ + Compute folding coordinates. Must be called at the beginning + of the placement stage, after all instances have been created. + """ + foldedWidth = self.sram.rootGroup.width // self.foldNb + self.xmin = self.sram.decoder.width + self.xmax = self.sram.decoder.width + foldedWidth + self.x = self.xmin + self.irow = 0 + self.direction = BaseSRAM.TO_RIGHT + + def getPosition ( self ): + """ Return the position to put the next column. """ + return self.x, self.irow + + def forceFold ( self ): + if self.direction == BaseSRAM.TO_RIGHT: + self.direction = BaseSRAM.TO_LEFT + self.x = self.xmax + else: + self.direction = BaseSRAM.TO_RIGHT + self.x = self.xmin + self.irow += self.sram.rootGroup.busWidth + 1 + self.fold += 1 + + def addWidth ( self, width ): + """ Increment the currently placed width and update ``(x,irow)``. """ + if self.direction == BaseSRAM.TO_RIGHT: + self.x += width + else: + self.x -= width + #if self.direction == BaseSRAM.TO_RIGHT: + # if self.x + width <= self.xmax: + # self.x += width + # return + # self.direction = BaseSRAM.TO_LEFT + # self.x = self.xmax + #else: + # if self.x - width >= self.xmin: + # self.x -= width + # return + # self.direction = BaseSRAM.TO_RIGHT + # self.x = self.xmin + #self.irow += self.sram.rootGroup.busWidth + 1 + #self.fold += 1 + + +# -------------------------------------------------------------------- +# Class : BaseSRAM. + +class BaseSRAM ( object ): + """ + Base class for SRAM builder. Provides utility functions. + """ + TO_RIGHT = 1 + TO_LEFT = 2 + + def __init__ ( self, fold ): + """ + Create the base structure of a SRAM, which contains : + + * ``self.dffCell`` : The DFF standard cell which provides the single + bit storing. + * ``self.rootGroup`` : A root column group named ``rootSRAM``, for the + matrix part (datapath). + * ``self.decoder`` : The soft block to store decoder cells. + * ``self.headers`` : The headers for the columns command and buffer + lines. + + The overall relative placement is organized as follow : :: + + +---------+-------------------------------------------+ + | | headers[1] (1 row) | + | +-------------------------------------------+ + | | | + | | Column area, fold 1 (N rows) | + | | | + | decoder +-------------------------------------------+ + | | headers[0] (1 row) | + | +-------------------------------------------+ + | | | + | | Column area, fold 0 (N rows) | + | | | + +---------+-------------------------------------------+ + """ + self.confLib = StdCellConf() + self.foldState = FoldState( self, fold ) + self.cell = None + self.foldTags = [] + self.dffCell = af.getCell( 'sff1_x4', CRL.Catalog.State.Views ) + self.position = Transformation() + self.sliceHeight = self.dffCell.getAbutmentBox().getHeight() + self.rootGroup = ColGroup( 'rootSRAM' ) + self.busses = {} + self.decoder = None + self.toHeaders = [] + self.headers = [ HeaderRow( self ) for row in range(fold) ] + + @property + def fold ( self ): + return self.foldState.fold + + def doFoldAtColumn ( self, column ): + if column.tag in self.foldTags: + self.foldState.forceFold() + + def getNet ( self, name, create=True ): + """ + Find a Net by name. If it doesn't exists and ``create`` is set to ``True``, + add it to the netlist. + """ + net = self.cell.getNet( name ) + if net: + return net + if not create: + return None + return Net.create( self.cell, name ) + + def addExternalNet ( self, name, direction, kind=Net.Type.LOGICAL ): + """ + Add a Net to the cell interface. + """ + net = self.getNet( name ) + net.setExternal ( True ) + net.setDirection( direction ) + net.setType ( kind ) + if kind == Net.Type.POWER or kind == Net.Type.GROUND: + net.setGlobal( True ) + return net + + def addInstance ( self, masterName, instName, netMapNames ): + """ + Create an Instance named ``instName`` of model ``masterName`` and + connect it's Plugs according to the dictionary ``netMapNames``. + + .. code:: python + + self.addInstance( 'a2_x2' + , 'decod_a2_1' + , { 'i0' : 'net_input_0' + , 'i1' : 'net_input_1' + , 'q' : 'net_output_X' } ) + """ + masterCell = af.getCell( masterName, CRL.Catalog.State.Views ) + inst = Instance.create( self.cell, instName, masterCell ) + for masterNetName, netName in netMapNames.items(): + masterNet = masterCell.getNet( masterNetName ) + net = self.getNet( netName ) + plug = inst.getPlug( masterNet ) + plug.setNet( net ) + return inst + + def connect ( self, instName, masterNetName, netName ): + """ + Connect the Plug ``masterNetName`` of instance ``instName`` to the + net ``netName``. + """ + inst = self.cell.getInstance( instName ) + masterNet = inst.getMasterCell().getNet( masterNetName ) + net = self.getNet( netName ) + inst.getPlug( masterNet ).setNet( net ) + + def placeInstance ( self, inst, x, irow, direction=TO_RIGHT, reverse=False ): + """ + Place an instance at a position defined by ``(x,irow)`` where : + + * ``x`` : the usual X coordinate. + * ``irow`` : the row into which to put the cell, the Y coordinate + is computed from it, accounting for the X axis + flipping that occurs on one row over two. + + The position is relative to the bottom left corner of the design + given by ``self.position``. + + .. note:: ``self.position`` should not contains rotations, unmanaged + for now. + """ + orients = { BaseSRAM.TO_RIGHT : [ Transformation.Orientation.ID + , Transformation.Orientation.MY ] + , BaseSRAM.TO_LEFT : [ Transformation.Orientation.MX + , Transformation.Orientation.R2 ] + } + if reverse: + if direction == BaseSRAM.TO_RIGHT: + x += inst.getMasterCell().getAbutmentBox().getWidth() + direction = BaseSRAM.TO_LEFT + else: + x -= inst.getMasterCell().getAbutmentBox().getWidth() + direction = BaseSRAM.TO_RIGHT + y = irow*self.sliceHeight + orient = orients[ direction ][ 0 ] + if irow % 2: + y += self.sliceHeight + orient = orients[ direction ][ 1 ] + trace( 610, '\tBaseSRAM.placeInstance() x={} y={} orient={} {}\n' \ + .format( DbU.getValueString(x), DbU.getValueString(y), orient, inst )) + transf = Transformation( x, y, orient ) + self.position.applyOn( transf ) + inst.setTransformation( transf ) + inst.setPlacementStatus( Instance.PlacementStatus.PLACED ) + return inst.getAbutmentBox() + + def findFoldColumns ( self ): + """ + Find the cuts between columns where the wiring is minimal. + Based on the ColGroup tree, assuming that the deeper a column is + in the tree the more they are closely connected and must not be + separateds. + """ + trace( 610, ',+', '\tBaseSRAM.findFoldcolumns()\n' ) + self.rootGroup.setCutCost( 0 ) + prevOrder = 0 + cutCosts = {} + count = 0 + for column in self.rootGroup: + cutCost = column.order-prevOrder + if cutCost in cutCosts: + cutCosts[ cutCost ].append(( count, column.tag )) + else: + cutCosts[ cutCost ] = [( count, column.tag )] + trace( 610, '\t{:>4} {:>4} {:>4} {}\n'.format( count + , column.order + , column.order-prevOrder + , column )) + prevOrder = column.order + count += 1 + keys = list( cutCosts.keys() ) + keys.sort() + for key in keys: + trace( 610, '\tcutCost[ {} ] = \n'.format( key )) + for order, tag in cutCosts[key]: + trace( 610, '\t | {:>3d} {}\n'.format( order, tag )) + trace( 610, '-,' ) + + def placeAt ( self, position=Transformation() ): + """ + Perform the placement of all the various area of the SRAM. + For the overall layout, see ``__init__()``. + """ + self.findFoldColumns() + self.position = position + self.foldState.setupDimensions() + with UpdateSession(): + bb = Box() + bb.merge( self.decoder.place( 0 ) ) + bb.merge( self.rootGroup.place() ) + for inst, refInst in self.toHeaders: + self.headers[ refInst.fold ].addInstanceAt( inst, refInst ) + for i in range(len(self.headers)): + trace( 610, ',+', 'Place row header {} {}\n'.format( i, self.headers[i].row )) + if i % 2: + xstart = bb.getXMax() + direction = BaseSRAM.TO_LEFT + else: + xstart = self.decoder.width + direction = BaseSRAM.TO_RIGHT + bb.merge( self.headers[i].place( xstart + , self.rootGroup.busWidth*(i + 1) + i + , direction )) + trace( 610, '-,' ) + self.cell.setAbutmentBox( bb ) + + def showTree ( self ): + """ + Display the Column tree of the SRAM. + """ + self.rootGroup.showTree() diff --git a/cumulus/src/plugins/sram/sram_256x32.py b/cumulus/src/plugins/sram/sram_256x32.py new file mode 100644 index 00000000..12d0e60d --- /dev/null +++ b/cumulus/src/plugins/sram/sram_256x32.py @@ -0,0 +1,493 @@ + +# This file is part of the Coriolis Software. +# Copyright (c) Sorbonne Université 2022-2022, All Rights Reserved +# +# +-----------------------------------------------------------------+ +# | C O R I O L I S | +# | C u m u l u s - P y t h o n T o o l s | +# | | +# | Author : Jean-Paul CHAPUT | +# | E-mail : Jean-Paul.Chaput@lip6.fr | +# | =============================================================== | +# | Python : "./plugins/sram_256x32.py" | +# +-----------------------------------------------------------------+ + + +""" +Verilog description of ``spram_256x32`` (256 words of 32 bits). +This descripion is the ASIC part of the SPRAM exctracted from +FreeCores : :: + + https://github.com/freecores/ethmac.git + +.. code:: Verilog + + module eth_spram_256x32( + // Generic synchronous single-port RAM interface + clk, rst, ce, we, oe, addr, di, dato + + // Generic synchronous single-port RAM interface + input clk; // Clock, rising edge + input rst; // Reset, active high + input ce; // Chip enable input, active high + input [ 3: 0] we; // Write enable input, active high + input oe; // Output enable input, active high + input [ 7: 0] addr; // address bus inputs + input [31: 0] di; // input data bus + output [31: 0] dato; // output data bus + + reg [ 7: 0] mem0 [255:0]; + reg [15: 8] mem1 [255:0]; + reg [23:16] mem2 [255:0]; + reg [31:24] mem3 [255:0]; + wire [31: 0] q; + reg [ 7: 0] raddr; + + // Data output drivers + assign dato = (oe & ce) ? q : {32{1'bz}}; + + // RAM read and write + // read operation + always@(posedge clk) + if (ce) + raddr <= addr; // read address needs to be registered to read clock + + assign q = rst ? {32{1'b0}} : { mem3[raddr] + , mem2[raddr] + , mem1[raddr] + , mem0[raddr] }; + + // write operation + always@(posedge clk) + begin + if (ce && we[3]) mem3[addr] <= di[31:24]; + if (ce && we[2]) mem2[addr] <= di[23:16]; + if (ce && we[1]) mem1[addr] <= di[15: 8]; + if (ce && we[0]) mem0[addr] <= di[ 7: 0]; + end + endmodule + + +Provisional results +~~~~~~~~~~~~~~~~~~~ + +.. note:: All length are in micro-meters. + ++--------------+-----------------------------+-----------------------------+ +| Kind | Generator | Yosys | ++==============+=============================+=============================+ +| # Gates | 23209 (-25.4%) | 32121 | ++--------------+-----------------------------+-----------------------------+ +| 1 Fold | ++--------------+-----------------------------+-----------------------------+ +| Area | 7182 x 330 (-5.5%) | 7380 x 340 | ++--------------+-----------------------------+-----------------------------+ +| Wirelength | 1841036 (-4.3%) | 1924153 | ++--------------+-----------------------------+-----------------------------+ +| 2 Fold | ++--------------+-----------------------------+-----------------------------+ +| Area | 3599 x 660 (-5.3%) | 3690 x 680 | ++--------------+-----------------------------+-----------------------------+ +| Wirelength | 1670455 (-6.3%) | 1782558 | ++--------------+-----------------------------+-----------------------------+ +| 4 Fold | ++--------------+-----------------------------+-----------------------------+ +| Area | 1812 x 1320 (-4.6%) | 1900 x 1320 | ++--------------+-----------------------------+-----------------------------+ +| Wirelength | 1699810 (-1.5%) | 1726436 | ++--------------+-----------------------------+-----------------------------+ + +Conclusions that we can draw from those results are : + +1. The generator version uses subtantially less gates than the Yosys one. + As the both SRAM uses the exact same number of SFFs, the difference is + only due to the decoder for the control of input and output muxes. + +2. Notwithanding having less gates the generator version uses similar areas, + which means that we use fewer but significantly *bigger* cells. + +3. The FlexLib library supplied for SkyWater 130nm do not contains all + SxLib one, effectively restricting our choices. + + In particular, to build the output multiplexer we only have mx2 and + mx3 cells, which are large. The density of the SRAM could be much + increased if we did have nmx2 and nmx3. We could also try to synthesise + the tree using nandX and norX but we are short of time. + + Furthermore for the output multiplexers, as it is a controlled case, + we may also uses three-state drivers cells (which have not been + ported either). + +.. note:: Cell width in the SkyWater 130 port of FlexLib: + + ============== ===== + Cell Width + ============== ===== + mx2_x2 7 + mx3_x2 11 + nand2_x0 2 + nand3_x0 3 + nand4_x0 4 + nor2_x0 2 + ============== ===== + + 1. mx2_x2 + mx3_x2 = 18 + 2. 9 * nand2_x0 = 18 + 3. 4 * nand3_x0 + nand4_x0 = 16 + 4. 6 * nand2_x0 + nor2_x0 = 14 +""" + + +import sys +import re +import traceback +import helpers +from helpers.io import ErrorMessage, WarningMessage +from helpers.overlay import UpdateSession +from helpers import trace, l, u, n +import plugins +from Hurricane import Breakpoint, DbU, Box, Net, Cell, Instance, \ + Transformation, PythonAttributes +import CRL +from Foehn import FoehnEngine, DagExtension +from plugins.chip.configuration import GaugeConf +from plugins.sram.sram import Bus, Column, ColBlock, ColGroup, \ + HeaderRow, BaseSRAM + + +""" +Simple Standard cells based SRAM generator. +""" + + +af = CRL.AllianceFramework.get() + + +# -------------------------------------------------------------------- +# Class : SRAM_256x32. + +class SRAM_256x32 ( BaseSRAM ): + """ + Build & place a SRAM of 256 words of 32 bits. + """ + BIT_GROUP_FMT = 'bit_addr{:04d}_g' + MUX_GROUP_FMT = 'bits_{}_g' + + def __init__ ( self, fold ): + BaseSRAM.__init__( self, fold ) + if fold == 1: + pass + elif fold == 2: + self.foldTags = [ 'imux_addr0128' ] + elif fold == 4: + self.foldTags = [ 'omux_0_to_127', 'imux_addr0128', 'imux_addr0240' ] + #self.foldTags = [ 'imux_addr0064', 'imux_addr0128', 'imux_addr0192' ] + else: + raise ErrorMessage( 1, 'SRAM_256x32.__init__(): Unsupported fold {}, valid values are 1, 2, 4.'.format( fold )) + self.cell = af.createCell( 'spram_256x32' ) + self.mx2Cell = af.getCell( 'mx2_x2', CRL.Catalog.State.Views ) + self.mx3Cell = af.getCell( 'mx3_x2', CRL.Catalog.State.Views ) + with UpdateSession(): + self.buildInterface() + self.decoder = ColBlock( self, 'decod', 33 ) + for addr in range(256): + bitGroup = ColGroup( SRAM_256x32.BIT_GROUP_FMT.format( addr )) + self.rootGroup.group( bitGroup ) + bitGroup.group( Column( self + , self.mx2Cell + , 'imux_addr{:04d}'.format( addr ) + , '_byte{byte}_{bbit}' + , 32 )) + bitGroup.group( Column( self + , self.dffCell + , 'bit_addr{:04d}'.format( addr ) + , '_byte{byte}_{bbit}' + , 32 )) + bus = Bus( self, 'imux_addr{:04d}_b_q({{}})'.format(addr), 32 ) + bitGroup.childs[0].setBusNet( 'q', bus ) + bitGroup.childs[1].setBusNet( 'i', bus ) + bus = Bus( self, 'bit_addr{:04d}_b_q({{}})'.format(addr), 32 ) + bitGroup.childs[0].setBusNet( 'i0', bus ) + bitGroup.childs[1].setBusNet( 'q', bus ) + bus = Bus( self, 'di({})', 32 ) + bitGroup.childs[0].setBusNet( 'i1', bus ) + bitGroup.childs[1].setCmdNet( 'ck', self.getNet( 'clk' )) + omuxGroupsCurr = [] + omuxGroupsNext = [] + muxDepth = 0 + for i in range(256//4): + childs = [] + for addr in range(i*4, (i+1)*4): + tag = SRAM_256x32.BIT_GROUP_FMT.format( addr ) + childs.append( self.rootGroup.findChild( tag )) + childs[-1].unGroup() + omuxGroupsCurr.append( self._doMux4( childs, muxDepth )) + while len(omuxGroupsCurr) >= 4: + trace( 610, '\tGrouping {} elements.\n'.format( len(omuxGroupsCurr ))) + muxDepth += 1 + for i in range(len(omuxGroupsCurr)//4): + omuxGroupsNext.append( self._doMux4( omuxGroupsCurr[i*4:(i+1)*4], muxDepth )) + omuxGroupsCurr = omuxGroupsNext + omuxGroupsNext = [] + for group in omuxGroupsCurr: + self.rootGroup.group( group ) + inst = self.addInstance( 'inv_x2' + , 'nrst_inv' + , { 'i' : 'rst' + , 'nq' : 'nrst' + } + ) + self.decoder.addInstance( 0, inst ) + for child in self.rootGroup.childs[0].childs: + if child.kind == Column.KIND_COLUMN: + if child.insts[0].getMasterCell() != self.mx3Cell: + continue + rstCol = Column( self + , af.getCell( 'a2_x2', CRL.Catalog.State.Views ) + , 'omux_nrst' + , '_byte{byte}_{bbit}' + , 32 ) + busOMux = Bus( self, child.tag+'_b_q({})', 32 ) + busDato = Bus( self, 'dato({})', 32 ) + child .setBusNet( 'q' , busOMux ) + rstCol.setBusNet( 'i0', busOMux ) + rstCol.setCmdNet( 'i1', self.getNet('nrst') ) + rstCol.setBusNet( 'q' , busDato ) + self.rootGroup.group( rstCol ) + self.buildDecoder() + af.saveCell( self.cell, CRL.Catalog.State.Logical ) + + def _doMux4 ( self, childs, muxDepth ): + """ + Build a 4 entry mux. It uses a mux2 / mux3 combination. + Returns a newly build group. + Entry selection given (cmd0,cmd1) : :: + + 00 ==> i0 (mux2.i0) + 01 ==> i1 (mux2.i1) + 10 ==> i2 (mux3.i2) + 11 ==> i3 (mux3.i1) + """ + tags = [] + for child in childs: + tags.append( child.tag ) + childIndex = 1 if muxDepth == 0 else 4 + muxTag = SRAM_256x32._mergeOMuxTags( tags ) + mux2Tag = SRAM_256x32._mergeOMuxTags( tags[0:2] ) + mux3Tag = SRAM_256x32._mergeOMuxTags( tags ) + muxGroup = ColGroup( muxTag+'_g' ) + trace( 610, ',+', '\tSRAM_256x32._doMux4() {} + {} -> {}\n' \ + .format( mux2Tag, mux3Tag, muxTag )) + mux2Col = Column( self + , self.mx2Cell + , mux2Tag + , '_byte{byte}_{bbit}' + , 32 ) + mux2Col.setCmdNet( 'cmd', self.getNet( 'raddr({})'.format(muxDepth*2) )) + mux3Col = Column( self + , self.mx3Cell + , mux3Tag + , '_byte{byte}_{bbit}' + , 32 ) + mux3Col.setCmdNet( 'cmd0', self.getNet( 'raddr({})'.format(muxDepth*2 + 1) )) + mux3Col.setCmdNet( 'cmd1', self.getNet( 'raddr({})'.format(muxDepth*2 ) )) + muxGroup.group( childs[0] ) + muxGroup.group( mux2Col ) + muxGroup.group( childs[1] ) + muxGroup.group( childs[2] ) + muxGroup.group( mux3Col ) + muxGroup.group( childs[3] ) + bus0 = Bus( self, tags[0][:-2]+'_b_q({})', 32 ) + bus1 = Bus( self, tags[1][:-2]+'_b_q({})', 32 ) + bus2 = Bus( self, tags[2][:-2]+'_b_q({})', 32 ) + bus3 = Bus( self, tags[3][:-2]+'_b_q({})', 32 ) + busMx2 = Bus( self, mux2Tag+'_b_q({})', 32 ) + childs[0].childs[ childIndex ].setBusNet( 'q', bus0 ) + childs[1].childs[ childIndex ].setBusNet( 'q', bus1 ) + childs[2].childs[ childIndex ].setBusNet( 'q', bus2 ) + childs[3].childs[ childIndex ].setBusNet( 'q', bus3 ) + mux2Col.setBusNet( 'i0', bus0 ) + mux2Col.setBusNet( 'i1', bus1 ) + mux2Col.setBusNet( 'q' , busMx2 ) + mux3Col.setBusNet( 'i0', busMx2 ) + mux3Col.setBusNet( 'i2', bus2 ) + mux3Col.setBusNet( 'i1', bus3 ) + childs[1].reverse() + childs[3].reverse() + trace( 610, '-,' ) + return muxGroup + + @staticmethod + def _mergeOMuxTags ( tags ): + """ + Merge two output mux column tags. We assume that we merge only + contiguous tags. + + Example: :: + + 'omux_0_to_1' + 'omux_2_to_3' ==> 'omux_0_to_3' + """ + vectorRe = re.compile( '^omux_(?P\d+)_to_(?P\d+)' ) + addrs = [] + for tag in tags: + end = -2 if tag.endswith('_g') else 0 + if tag.startswith('bit'): + addrs.append( int( tag[8:end] )) + elif tag.startswith('omux'): + m = vectorRe.match( tag ) + addrs += [ int(m.group('lsb')), int(m.group('msb')) ] + addrs.sort() + omuxTag = 'omux' + omuxTag = 'omux_{}_to_{}'.format( addrs[0], addrs[-1] ) + return omuxTag + + def buildInterface ( self ): + """ Build the interface of the SRAM. """ + self.addExternalNet( 'clk', Net.Direction.DirIn, Net.Type.CLOCK ) + self.addExternalNet( 'rst', Net.Direction.DirIn ) + self.addExternalNet( 'ce' , Net.Direction.DirIn ) + for bit in range(4): + self.addExternalNet( 'we({})'.format(bit) , Net.Direction.DirIn ) + self.addExternalNet( 'oe' , Net.Direction.DirIn ) + for bit in range(8): + self.addExternalNet( 'addr({})'.format(bit) , Net.Direction.DirIn ) + for bit in range(32): + self.addExternalNet( 'di({})'.format(bit) , Net.Direction.DirIn ) + for bit in range(32): + self.addExternalNet( 'dato({})'.format(bit) , Net.Direction.DirOut ) + self.addExternalNet( 'vdd' , Net.Direction.DirIn, Net.Type.POWER ) + self.addExternalNet( 'vss' , Net.Direction.DirIn, Net.Type.GROUND ) + + def _getDecodNetName ( self, oneHot, addrWidth ): + """ + Build a net name for a particular oneHot bit in the range covered by addrWidth. + The first part is the address lines and the second the value they decod. + If the oneHot value exceed 2^addrWidth, we uses the *next* address lines. + + ======== =========== ======================== + oneHot addrWidth net name + ======== =========== ======================== + 0 4 'decod_3_2_1_0_0000' + 1 4 'decod_3_2_1_0_0001' + 2 4 'decod_3_2_1_0_0010' + 3 4 'decod_3_2_1_0_0011' + 4 4 'decod_3_2_1_0_0100' + 15 4 'decod_3_2_1_0_1111' + 16 4 'decod_7_6_5_4_0000' + 17 4 'decod_7_6_5_4_0001' + ======== =========== ======================== + """ + netName = '' + indexFirstBit = (oneHot >> addrWidth) * addrWidth + for bit in range(indexFirstBit, indexFirstBit + addrWidth): + netName = '{}_'.format(str( bit )) + netName + divider = 1 << addrWidth + netName = '{}{:0{width}b}'.format( netName, oneHot % divider, width=addrWidth ) + return netName + + def _getDecodInstConf ( self, oneHot, addrWidth ): + """ + Compute the informations needed to instanciate one cell of one level of + the decoder. For the first level of one hot (addrWidth == 2), the inputs + are just direct or inverted addresses bits. For the upper level we + combine the outputs of the previous one hot level, that is the one with + addrWidth/2 to generate the current one. + """ + instConf = [] + if addrWidth == 2: + indexFirstBit = (oneHot >> addrWidth) * addrWidth + valueAddr = oneHot % (1 << addrWidth) + trunkName = self._getDecodNetName( oneHot, addrWidth ) + instConf.append( 'a2_x2' ) + instConf.append( 'decod_a2_{}'.format( trunkName )) + instConf.append( {} ) + for i in range(2): + inv = '' if (valueAddr & (1 << i)) else 'n_' + instConf[2][ 'i{}'.format(i) ] = '{}addr({})'.format( inv, indexFirstBit+i ) + instConf[2][ 'q' ] = 'decod_'+trunkName + elif addrWidth == 4 or addrWidth == 8: + halfWidth = addrWidth>>1 + halfMask = 0 + for i in range(halfWidth): + halfMask |= 1 << i + indexFirstBit = (oneHot >> addrWidth) * addrWidth + valueAddr = oneHot % (1 << addrWidth) + trunkName = self._getDecodNetName( oneHot, addrWidth ) + instConf.append( 'a2_x2' ) + instConf.append( 'decod_a2_{}'.format( trunkName )) + instConf.append( {} ) + offset = (oneHot >> addrWidth) << (halfWidth+1) + oneHot0 = (oneHot & halfMask) + offset + instConf[2][ 'i0' ] = 'decod_'+self._getDecodNetName( oneHot0, halfWidth ) + oneHot1 = ((oneHot >> halfWidth) & halfMask) + (1<<(halfWidth)) + offset + instConf[2][ 'i1' ] = 'decod_'+self._getDecodNetName( oneHot1, halfWidth ) + instConf[2][ 'q' ] = 'decod_'+trunkName + trace( 610, '\t{:08b} {:3d}:{} + {:3d}:{} => {:3d}::{:08b}:{}\n' \ + .format( halfMask + , oneHot0, self._getDecodNetName( oneHot0, halfWidth ) + , oneHot1, self._getDecodNetName( oneHot1, halfWidth ) + , oneHot , oneHot, trunkName )) + return instConf + + def buildDecoder ( self ): + trace( 610, ',+', '\tSRAM_256x32.buildDecoder()\n' ) + for bit in range(8): + inst = self.addInstance( 'mx2_x2' + , 'raddr_imux_{}'.format(bit) + , { 'cmd' : 'ce' + , 'i0' : 'raddr({})'.format(bit) + , 'i1' : 'addr({})'.format(bit) + , 'q' : 'raddr_imux_q({})'.format(bit) + } + ) + self.decoder.addInstance( bit * 4 + 1, inst ) + inst = self.addInstance( 'sff1_x4' + , 'raddr_sff_{}'.format(bit) + , { 'i' : 'raddr_imux_q({})'.format(bit) + , 'q' : 'raddr({})'.format(bit) + } + ) + self.decoder.addInstance( bit * 4, inst ) + self.connect( 'raddr_sff_{}'.format(bit), 'ck', 'clk' ) + for bit in range(8): + inst = self.addInstance( 'inv_x1' + , 'decod_inv_{}'.format(bit) + , { 'i' : 'addr({})'.format(bit) + , 'nq' : 'n_addr({})'.format(bit) + } + ) + self.decoder.addInstance( bit*4 + 1, inst ) + for oneHot in range(16): + trace( 610, '\t{}\n'.format( self._getDecodNetName(oneHot,2) )) + instDatas = self._getDecodInstConf( oneHot, 2 ) + inst = self.addInstance( instDatas[0], instDatas[1], instDatas[2] ) + self.decoder.addInstance( oneHot*2 + 1, inst ) + for oneHot in range(32): + instDatas = self._getDecodInstConf( oneHot, 4 ) + inst = self.addInstance( instDatas[0], instDatas[1], instDatas[2] ) + self.decoder.addInstance( oneHot + (oneHot+1)%2, inst ) + for oneHot in range(256): + bitTag = 'bit_addr{:04d}'.format( oneHot ) + imuxTag = 'imux_addr{:04d}'.format( oneHot ) + instDatas = self._getDecodInstConf( oneHot, 8 ) + inst = self.addInstance( instDatas[0], instDatas[1], instDatas[2] ) + dffCol = self.rootGroup.findChild( bitTag ) + imuxCol = self.rootGroup.findChild( imuxTag ) + self.toHeaders.append(( inst, imuxCol.insts[0] )) + for we in range(4): + cmdNetName = 'decod_addr{:04d}_we({})'.format( oneHot, we ) + inst = self.addInstance( 'a3_x2' + , 'decod_a3_we_{}_{}'.format(we,oneHot) + , { 'i0' : instDatas[2]['q'] + , 'i1' : 'ce' + , 'i2' : 'we({})'.format(we) + , 'q' : cmdNetName + } + ) + self.toHeaders.append(( inst, dffCol.insts[0] )) + for bit in range(8): + self.connect( 'imux_addr{:04d}_byte{byte}_{bbit}'.format( oneHot, byte=we, bbit=bit ) + , 'cmd' + , cmdNetName + ) + trace( 610, '-,' ) diff --git a/cumulus/src/plugins/sram/sramplacer2.py b/cumulus/src/plugins/sram/sramplacer2.py index fd3827f2..1d7b55b0 100644 --- a/cumulus/src/plugins/sram/sramplacer2.py +++ b/cumulus/src/plugins/sram/sramplacer2.py @@ -51,6 +51,8 @@ Automatic placement of a Yosys generated SRAM own structure. 26 signals takes up more than half the horizontal routing capacity of a slice (40), this result in an unroutable design, the bits are kept into one row each. + 832 gates is for the TSMC 180nm, for SkyWater 130nm we got + 976 gates on the third level. Conclusions ~~~~~~~~~~~