Source code for hwtLib.amba.axi_comp.lsu.write_aggregator

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from hwt.code import If, Concat
from hwt.code_utils import rename_signal
from hwt.hdl.constants import READ, WRITE
from hwt.interfaces.utils import addClkRstn, propagateClkRstn
from hwt.serializer.mode import serializeParamsUniq
from hwt.synthesizer.param import Param
from hwt.synthesizer.rtlLevel.rtlSignal import RtlSignal
from hwt.synthesizer.unit import Unit
from hwtLib.amba.axi4 import Axi4
from hwtLib.amba.axi_comp.lsu.fifo_oooread import FifoOutOfOrderReadFiltered
from hwtLib.amba.axi_comp.lsu.interfaces import AxiWriteAggregatorWriteTmpIntf
from hwtLib.amba.axi_comp.lsu.write_aggregator_write_dispatcher import AxiWriteAggregatorWriteDispatcher
from hwtLib.amba.axis_comp.builder import AxiSBuilder
from hwtLib.amba.axis_comp.reg import AxiSReg
from hwtLib.amba.constants import RESP_OKAY
from hwtLib.handshaked.streamNode import StreamNode
from hwtLib.logic.oneHotToBin import oneHotToBin
from hwtLib.mem.ramCumulativeMask import BramPort_withReadMask_withoutClk, \
    RamCumulativeMask, is_mask_byte_unaligned


[docs]@serializeParamsUniq class AxiWriteAggregator(Unit): """ A buffer which is used for write data from cache. It manages: * out of order write acknowledge * write transaction merging .. image:: ./_static/AxiWriteAggregator.png :ivar ID_WIDTH: a parameter which specifies width of axi id signal, it also specifies the number of items in this buffer (2**ID_WIDTH) :ivar MAX_BLOCK_DATA_WIDTH: specifies maximum data width of RAM (used to prevent synthesis problems for tools which can not handle too wide memories with byte enable) .. hwt-autodoc:: _example_AxiWriteAggregator """ def _config(self): AxiWriteAggregatorWriteDispatcher._config(self) self.MAX_BLOCK_DATA_WIDTH = Param(None) def _declr(self): addClkRstn(self) AxiWriteAggregatorWriteDispatcher.precompute_constants(self) with self._paramsShared(): self.s = s_axi = Axi4() s_axi.HAS_R = False self.m = m_axi = Axi4()._m() m_axi.HAS_R = False self.write_dispatch = AxiWriteAggregatorWriteDispatcher() self.ooo_fifo = of = FifoOutOfOrderReadFiltered() of.ITEMS = 2 ** self.ID_WIDTH of.KEY_WIDTH = self.CACHE_LINE_ADDR_WIDTH self.data_ram = self._declr_data_ram()
[docs] def _declr_data_ram(self): dr = RamCumulativeMask() dr.MAX_BLOCK_DATA_WIDTH = self.MAX_BLOCK_DATA_WIDTH # data bits and mask bits extended so the total DW % 8 == 0 dr.DATA_WIDTH = self.DATA_WIDTH dr.ADDR_WIDTH = self.DATA_RAM_INDEX_WIDTH dr.PORT_CNT = (WRITE, READ) dr.HAS_BE = True return dr
[docs] def _addr_to_index(self, addr: RtlSignal): return addr[:self.CACHE_LINE_OFFSET_BITS]
[docs] def w_in_tmp_reg_load(self) -> AxiWriteAggregatorWriteTmpIntf: """ * check if this address is already present in address CAM or w_in_reg """ w_in_aw = self.s.aw w_in_w = self.s.w w_in_b = AxiSBuilder(self, self.s.b, master_to_slave=False).buff(latency=(1, 2)).end ooo_fifo = self.ooo_fifo write_pre_lookup = ooo_fifo.write_pre_lookup write_pre_lookup_res = ooo_fifo.write_pre_lookup_res write_pre_lookup.data(self._addr_to_index(w_in_aw.addr)) w_in_reg = AxiSReg(AxiWriteAggregatorWriteTmpIntf) w_in_reg.ID_WIDTH = self.ID_WIDTH w_in_reg.ADDR_WIDTH = self.CACHE_LINE_ADDR_WIDTH w_in_reg.DATA_WIDTH = self.DATA_WIDTH w_in_reg.ITEMS = self.ooo_fifo.ITEMS self.w_in_reg = w_in_reg w_tmp_in: AxiWriteAggregatorWriteTmpIntf = w_in_reg.dataIn w_tmp_out: AxiWriteAggregatorWriteTmpIntf = w_in_reg.dataOut # if true it means that the current input write data should be merged with # a content of the w_tmp register colides_with_last_addr_tick = (w_tmp_out.valid & w_in_aw.valid & self._addr_to_index(w_in_aw.addr)._eq(w_tmp_out.addr)) w_tmp_in.data(w_in_w.data) w_tmp_in.strb(w_in_w.strb) w_tmp_in.last(w_in_w.last) addr_related_inputs = [ w_tmp_in.id(w_in_aw.id), w_tmp_in.addr(self._addr_to_index(w_in_aw.addr)), w_tmp_in.colides_with_last_addr(colides_with_last_addr_tick), w_tmp_in.cam_lookup(write_pre_lookup_res.data), w_tmp_in.mask_byte_unaligned(is_mask_byte_unaligned(w_in_w.strb)), ] w_in_b.resp(RESP_OKAY) write_pre_lookup_res.rd(1) if self.BUS_WORDS_IN_CACHE_LINE == 1: w_in_b.id(w_in_aw.id) sync = StreamNode( [w_in_aw, w_in_w], [w_tmp_in, write_pre_lookup, w_in_b], ) else: w_in_b.id(w_tmp_out.id) w_in_first = self._reg("w_first", def_val=1) If(w_in_first, # new transaction initalization * addr_related_inputs, ).Else( # copy the last values specific for this transaction w_tmp_in.id(w_tmp_out.id), w_tmp_in.addr(w_tmp_out.addr), w_tmp_in.colides_with_last_addr(w_tmp_out.colides_with_last_addr | colides_with_last_addr_tick), w_tmp_in.cam_lookup(w_tmp_out.cam_lookup), w_tmp_in.mask_byte_unaligned(w_tmp_out.mask_byte_unaligned | is_mask_byte_unaligned(w_in_w.strb)), ) w_in_last = w_in_w.last # allow aw and write_pre_lookup only in first word, w_in_b only in last sync = StreamNode( [w_in_aw, w_in_w], [w_tmp_in, write_pre_lookup, w_in_b], skipWhen={ w_in_aw:~w_in_first, write_pre_lookup:~w_in_first, w_in_b:~w_in_last, }, extraConds={ w_in_aw: w_in_first, write_pre_lookup: w_in_first, w_in_b: w_in_last, } ) If(sync.ack(), w_in_first(w_in_last) ) sync.sync() # s_axi_stalling = ~w_in_aw.valid | ~w_in_w.valid | ~w_in_b.ready return w_tmp_out
[docs] def resolve_cam_index(self, w_tmp_out: AxiWriteAggregatorWriteTmpIntf): ooo_fifo = self.ooo_fifo # CAM insert cam_index_onehot_previous = self._reg("cam_index_onehot_previous", w_tmp_out.cam_lookup._dtype) cam_index_onehot = rename_signal( self, w_tmp_out.colides_with_last_addr._ternary(cam_index_onehot_previous, w_tmp_out.cam_lookup) & ooo_fifo.item_valid & ~ooo_fifo.item_write_lock, "cam_index_onehot") cam_found = rename_signal(self, cam_index_onehot != 0, "cam_found") cam_found_index = oneHotToBin(self, cam_index_onehot, "cam_found_index") If(w_tmp_out.valid & w_tmp_out.ready, cam_index_onehot_previous(cam_index_onehot) ) return cam_found_index, cam_found
[docs] def data_insert(self, items: BramPort_withReadMask_withoutClk): """ * if it is possible to update data in data_ram of this buffer * else allocate new data (insert to address CAM of ooo_fifo) and store data to w_in_reg .. figure:: ./_static/AxiWriteAggregator_data_insert.png """ ooo_fifo = self.ooo_fifo w_tmp_out = self.w_in_tmp_reg_load() cam_found_index, cam_found = self.resolve_cam_index(w_tmp_out) write_execute = ooo_fifo.write_execute write_execute.key(w_tmp_out.addr) current_empty = rename_signal(self, ~ooo_fifo.item_valid[write_execute.index], "current_empty") will_insert_new_item = rename_signal( self, ~cam_found & current_empty & write_execute.vld, "will_insert_new_item") # store to tmp register (and accumulate if possible) item_insert_last = self._sig("item_insert_last") item_insert_first = self._sig("item_insert_first") # insert word iteration, # push data to items RAM if self.BUS_WORDS_IN_CACHE_LINE == 1: # a cacheline fits in to a single busword, no extracarerequired item_insert_last(1) item_insert_first(1) items.din(w_tmp_out.data) items.we(w_tmp_out.strb) push_ptr = write_execute.index else: # iteration over multiple bus words to store a cacheline push_offset = self._reg("push_offset", self.word_index_t, def_val=0) item_write_start = rename_signal( self, will_insert_new_item | (cam_found & w_tmp_out.valid), "item_write_start") If(items.en.vld & items.en.rd & # currently writing to data_ram (item_write_start | (push_offset != 0)), # continue writing the parts of tmp reg to data_ram If(push_offset != self.WORD_OFFSET_MAX, push_offset(push_offset + 1) ).Else( push_offset(0) ) ) item_insert_last(push_offset._eq(self.WORD_OFFSET_MAX)) item_insert_first(push_offset._eq(0)) cam_found_index = Concat(cam_found_index, push_offset) push_ptr = Concat(write_execute.index, push_offset) items.din(w_tmp_out.data) items.we(w_tmp_out.strb) If(w_tmp_out.valid & cam_found, items.addr(cam_found_index) ).Else( items.addr(push_ptr) ) items.do_accumulate(w_tmp_out.valid & (w_tmp_out.mask_byte_unaligned | (w_tmp_out.colides_with_last_addr & cam_found))) items.do_overwrite(w_tmp_out.valid & ~cam_found) write_confirm = ooo_fifo.write_confirm StreamNode( masters=[w_tmp_out, write_execute], slaves=[items.en], extraConds={ write_execute: rename_signal(self, will_insert_new_item & item_insert_last, "write_exe_en"), items.en: rename_signal( self, (will_insert_new_item | ~item_insert_first) & (write_confirm.rd | cam_found), "items_en_en"), w_tmp_out: rename_signal(self, (((write_confirm.rd & current_empty) | cam_found)), "w_tmp_out_en") }, skipWhen={ write_execute:~will_insert_new_item, } ).sync() write_confirm.vld(w_tmp_out.valid & will_insert_new_item & item_insert_last & items.en.rd)
def _impl(self): of = self.ooo_fifo data_ram = self.data_ram self.data_insert( data_ram.port[0] ) wd = self.write_dispatch self.m(wd.m) data_ram.port[1](wd.data) of.read_confirm(wd.read_confirm) wd.read_execute(of.read_execute) propagateClkRstn(self)
[docs]def _example_AxiWriteAggregator(): u = AxiWriteAggregator() u.ID_WIDTH = 2 u.CACHE_LINE_SIZE = 4 u.DATA_WIDTH = 32 u.MAX_BLOCK_DATA_WIDTH = 8 return u
if __name__ == "__main__": from hwt.synthesizer.utils import to_rtl_str # from hwtLib.xilinx.constants import XILINX_VIVADO_MAX_DATA_WIDTH u = _example_AxiWriteAggregator() # u.ID_WIDTH = 6 # u.CACHE_LINE_SIZE = 64 # u.DATA_WIDTH = 256 # u.MAX_BLOCK_DATA_WIDTH = XILINX_VIVADO_MAX_DATA_WIDTH # u.ID_WIDTH = 2 # u.CACHE_LINE_SIZE = 4 # u.DATA_WIDTH = (u.CACHE_LINE_SIZE // 2) * 8 # u.ADDR_WIDTH = 16 # u.ID_WIDTH = 2 # u.CACHE_LINE_SIZE = 4 # u.DATA_WIDTH = 32 # u.MAX_BLOCK_DATA_WIDTH = 8 print(to_rtl_str(u))