Add AXI stream replay buffer

This implements an AXI stream buffer which allows replaying of the first portion of each packet. The intent is to simplify the implementation of CSMA/CD. This requires keeping 56 bytes of data to "replay" (slot time minus the preamble). After these bytes are transmitted, we can only get late collisions. We always read from the buffer, as this simplifies the implementation compared to some kind of hybrid fifo/skid buffer approach. The primary design problem faced is in determining when it's OK to overwrite the first byte in the packet. A naïve approach might be to allow overwriting whenever the slave reads the last byte. However, in the case of a 54-byte packet, we will still need to allow replaying at this point (in case there is a collision on the last byte). We can't just wait for m_axis_ready to go high, because that would violate the AXI stream protocol. To solve this, the slave must assert the done signal when it is finished with the packet. Signed-off-by: Sean Anderson <seanga2@gmail.com>
2022-11-30 15:23:59 -05:00 · 2022-11-30 15:23:59 -05:00 · 52325f241b
parent be2bded61e
commit 52325f241b
4 changed files with 281 additions and 1 deletions
--- a/2
+++ b/2
@ -91,7 +91,7 @@ endef
 	$(run-vvp)
 MODULES := pcs_rx pcs_tx pmd_dp83223_rx nrzi_encode nrzi_decode scramble descramble mdio mdio_io
-MODULES += mii_io_rx mii_io_tx mdio_regs
+MODULES += mii_io_rx mii_io_tx mdio_regs phy_core axis_replay_buffer
 .PHONY: test
 test: $(addsuffix .fst,$(MODULES)) $(addsuffix .synth.fst,$(MODULES))
--- a/rtl/axis_replay_buffer.v
+++ b/rtl/axis_replay_buffer.v
@ -0,0 +1,182 @@
 // SPDX-License-Identifier: AGPL-3.0-Only
 /*
 * Copyright (C) 2022 Sean Anderson <seanga2@gmail.com>
 *
 * This module implements a "replay buffer" for an AXI stream, allowing the
 * first BUF_SIZE cycles of a packet to be replayed. This may be done by
 * asserting replay while replayable is true.
 *
 * replayable will remain true until BUF_SIZE + 1 handshakes have occured
 * without a replay. In particular, it is possible to restart a packet 
 * even after a handshake with m_axis_last set. To support this late replay
 * feature, done must be asserted when the consumer is does not wish to
 * perform any more replays.
 *
 * In general, this buffer will add two cycles of latency. Additionally, there
 * will may some latency when replayable goes low. This is because the slave
 * interface stalls to avoid overwriting the first part of the packet. However,
 * it will still read ahead to the physical end of the buffer. This will result
 * in no stall as long as BUF_SIZE is at least three less than a power of two.
 *
 * Only axis_data is provided. For user, keep, etc. concatenate them into
 * axis_data. 
 */
 `include "common.vh"
 module axis_replay_buffer (
 	input clk,
 	/* AXI Stream slave */
 	input [DATA_WIDTH - 1:0] s_axis_data,
 	input s_axis_valid,
 	output reg s_axis_ready,
 	input s_axis_last,
 	/* AXI Stream master */
 	output reg [DATA_WIDTH - 1:0] m_axis_data,
 	output reg m_axis_valid,
 	input m_axis_ready,
 	output reg m_axis_last,
 	/* Control */
 	/*
 	 * Replay the packet. May be asserted any time replayable is high,
 	 * including after BUF_SIZE handshakes have occured and after
 	 * m_axis_last is high. Must not be asserted when replayable is low.
 	 */
 	input replay,
 	/*
 	 * Force replayable low. This must be asserted for packets <= BUF_SIZE,
 	 * since they may still be replayed even after the end of the packet.
 	 */
 	input done,
 	/*
 	 * High when replay may be asserted.
 	 */
 	output reg replayable
 );
 	parameter DATA_WIDTH	= 9;
 	parameter BUF_SIZE	= 54;
 	localparam BUF_WIDTH	= $clog2(BUF_SIZE + 1);
 	reg [DATA_WIDTH - 1:0] s_axis_data_last;
 	reg s_axis_valid_last, s_axis_last_last, s_axis_ready_next;
 	reg m_axis_valid_next, m_axis_last_next;
 	reg sent_last, sent_last_next;
 	reg [DATA_WIDTH - 1:0] buffer [(2 ** BUF_WIDTH) - 1:0];
 	reg [BUF_WIDTH:0] m_ptr, m_ptr_next, s_ptr, s_ptr_next;
 	reg [BUF_WIDTH - 1:0] last_ptr, last_ptr_next;
 	reg [DATA_WIDTH - 1:0] s_data, m_data;
 	reg last, last_next;
 	reg full, empty, replayable_next, we, re;
 	initial begin
 		m_ptr = 0;
 		s_ptr = 0;
 		last = 0;
 		replayable = 1;
 		s_axis_valid_last = 0;
 		s_axis_last_last = 0;
 		s_axis_ready = 1;
 		m_axis_valid = 0;
 		m_axis_last = 0;
 		sent_last = 0;
 	end
 	always @(*) begin
 		empty = s_ptr == m_ptr;
 		full = s_ptr == { ~m_ptr[BUF_WIDTH], m_ptr[BUF_WIDTH - 1:0] };
 		we = 0;
 		s_ptr_next = s_ptr;
 		last_next = last;
 		last_ptr_next = last_ptr;
 		if (s_axis_valid_last && s_axis_ready) begin
 			we = 1;
 			s_ptr_next = s_ptr + 1;
 			if (s_axis_last_last) begin
 				last_next = 1;
 				last_ptr_next = s_ptr;
 			end
 		end
 		if (replayable)
 			s_axis_ready_next = &s_ptr[BUF_WIDTH - 1:0] == s_ptr[BUF_WIDTH];
 		else
 			s_axis_ready_next = !full;
 		if (last_next)
 			s_axis_ready_next = 0;
 		/* read the next datum (if it's available)... */
 		m_axis_valid_next = !empty;
 		m_axis_last_next = last && m_ptr[BUF_WIDTH - 1:0] == last_ptr;
 		re = !empty;
 		m_ptr_next = m_ptr + !empty;
 		/* ...except if we need to stall */
 		if (m_axis_valid && !m_axis_ready) begin
 			m_axis_valid_next = m_axis_valid;
 			m_axis_last_next = m_axis_last;
 			re = 0;
 			m_ptr_next = m_ptr;
 		end
 		replayable_next = replayable;
 		sent_last_next = sent_last;
 		if (m_axis_valid && m_axis_ready) begin
 			replayable_next = replayable && (replay || m_ptr != BUF_SIZE + 1);
 			sent_last_next = sent_last || m_axis_last;
 		end
 		if (done)
 			replayable_next = 0;
 		if (sent_last && !replayable) begin
 			m_ptr_next = 0;
 			s_ptr_next = 0;
 			last_next = 0;
 			replayable_next = 1;
 			sent_last_next = 0;
 		end
 		if (replay) begin
 			m_ptr_next = 0;
 			sent_last_next = 0;
 			m_axis_valid_next = 0;
 			m_axis_last_next = 0;
 		end
 	end
 	always @(posedge clk) begin
 		if (we)
 			buffer[s_ptr[BUF_WIDTH - 1:0]] <= { s_axis_data_last };
 		if (re)
 			{ m_axis_data } <= buffer[m_ptr[BUF_WIDTH - 1:0]];
 		s_axis_data_last <= s_axis_data;
 		s_axis_valid_last <= s_axis_valid;
 		s_axis_last_last <= s_axis_last;
 		s_axis_ready <= s_axis_ready_next;
 		m_axis_last <= m_axis_last_next;
 		m_axis_valid <= m_axis_valid_next;
 		sent_last <= sent_last_next;
 		m_ptr <= m_ptr_next;
 		s_ptr <= s_ptr_next;
 		last <= last_next;
 		last_ptr <= last_ptr_next;
 		replayable <= replayable_next;
 	end
 `ifndef SYNTHESIS
 	/* This is the only way to look into a buffer... */
 	genvar i;
 	generate for (i = 0; i < 2 ** BUF_WIDTH; i = i + 1)
 		wire [DATA_WIDTH - 1:0] tmp = buffer[i];
 	endgenerate
 `endif
 endmodule
--- a/tb/axis_replay_buffer.py
+++ b/tb/axis_replay_buffer.py
@ -0,0 +1,86 @@
 # SPDX-License-Identifier: AGPL-3.0-Only
 # Copyright (C) 2022 Sean Anderson <seanga2@gmail.com>
 import cocotb
 from cocotb.clock import Clock
 from cocotb.regression import TestFactory
 from cocotb.triggers import ClockCycles, FallingEdge, RisingEdge, Timer
 from .util import ClockEnable, lookahead, timeout
 BUF_SIZE = 54
@timeout(15, 'us')
 async def test_replay(buf, in_ratio, out_ratio):
    buf.s_axis_valid.value = 0
    buf.s_axis_last.value = 0
    buf.m_axis_ready.value = 1
    buf.replay.value = 0
    buf.done.value = 0
    await Timer(1)
    await cocotb.start(Clock(buf.clk, 8, units='ns').start())
    await FallingEdge(buf.clk)
    await cocotb.start(ClockEnable(buf.clk, buf.m_axis_ready, out_ratio))
    # A packet equal to BUF_SIZE, one around 2**BUF_WIDTH, and one around
    # 2**(BUF_WIDTH + 1) (plus some extra). This should capture most of the fun
    # conditions. We start at different data values to make sure we aren't
    # reusing anything from the last test.
    packets = [list(range(54)), list(range(64, 128)), list(range(128, 512))]
    async def send():
        for packet in packets:
            for val, last in lookahead(packet):
                buf.s_axis_data.value = val
                buf.s_axis_valid.value = 1
                buf.s_axis_last.value = last
                while True:
                    await FallingEdge(buf.clk)
                    if buf.s_axis_ready.value:
                        break
                buf.s_axis_valid.value = 0
                if in_ratio != 1:
                    await ClockCycles(buf.clk, in_ratio - 1, rising=False)
    async def recv(packet):
        async def handshake():
            while not buf.m_axis_valid.value or not buf.m_axis_ready.value:
                await RisingEdge(buf.clk)
        async def recv_len(length):
            for i, val in enumerate(packet[:length]):
                await handshake()
                assert buf.m_axis_data.value == val
                assert buf.m_axis_last == (i == len(packet) - 1)
                await RisingEdge(buf.clk)
        async def restart():
            await FallingEdge(buf.clk)
            assert buf.replayable.value
            buf.replay.value = 1
            await FallingEdge(buf.clk)
            buf.replay.value = 0
        buf.done.value = 0
        replayable = min(len(packet), BUF_SIZE)
        await recv_len(replayable - 3)
        await restart()
        await recv_len(replayable - 2)
        # As long as the packet is <= BUF_SIZE we should be able to wait
        # Try it out
        if len(packet) <= BUF_SIZE:
            await ClockCycles(buf.clk, 3)
        await restart()
        buf.done.value = 1
        await recv_len(len(packet))
    await cocotb.start(send())
    for packet in packets:
        await recv(packet)
 replay_tests = TestFactory(test_replay)
 replay_tests.add_option('in_ratio', (1, 2))
 replay_tests.add_option('out_ratio', (1, 2))
 replay_tests.generate_tests()
--- a/tb/util.py
+++ b/tb/util.py
@ -140,8 +140,20 @@ def compare_lists(ins, outs):
 async def ClockEnable(clk, ce, ratio):
    ce.value = 1
    if ratio == 1:
        return
    while True:
        await ClockCycles(clk, 1, False)
        ce.value = 0
        await ClockCycles(clk, ratio - 1, False)
        ce.value = 1
 # Adapted from https://stackoverflow.com/a/1630350/5086505
 def lookahead(it):
    it = iter(it)
    last = next(it)
    for val in it:
        yield last, False
        last = val
    yield last, True