diff --git a/Bender.yml b/Bender.yml index ba293ff..448ca17 100644 --- a/Bender.yml +++ b/Bender.yml @@ -20,9 +20,13 @@ sources: - rtl/tcdm_interconnect/addr_dec_resp_mux.sv - rtl/tcdm_interconnect/amo_shim.sv - rtl/variable_latency_interconnect/addr_decoder.sv + - rtl/variable_latency_interconnect/burst_pkg.sv # Level 1 - rtl/tcdm_interconnect/xbar.sv - rtl/variable_latency_interconnect/simplex_xbar.sv + - rtl/variable_latency_interconnect/burst_cutter.sv + - rtl/variable_latency_interconnect/burst_manager.sv + - rtl/variable_latency_interconnect/burst_req_grouper.sv # Level 2 - rtl/tcdm_interconnect/clos_net.sv - rtl/tcdm_interconnect/bfly_net.sv @@ -32,6 +36,7 @@ sources: - rtl/variable_latency_interconnect/variable_latency_bfly_net.sv # Level 4 - rtl/variable_latency_interconnect/variable_latency_interconnect.sv + - rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv # Low-Latency Interco - rtl/low_latency_interco/FanInPrimitive_Req.sv diff --git a/rtl/variable_latency_interconnect/burst_cutter.sv b/rtl/variable_latency_interconnect/burst_cutter.sv new file mode 100644 index 0000000..987c6bd --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_cutter.sv @@ -0,0 +1,179 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Diyou Shen ETH Zurich +// Author: Marco Bertuletti ETH Zurich + +/// Burst Cutter: +/// Divides the burst request from NumIn initiators in multiple bursts when it +/// crosses the address boundary in the target multi-banked Memory. + +module burst_cutter + import burst_pkg::burst_t; +#( + parameter int unsigned NumIn = 32, + parameter int unsigned NumOut = 64, + parameter int unsigned AddrWidth = 32, + parameter int unsigned DataWidth = 32, + parameter int unsigned BeWidth = DataWidth/8, + // Number of Address bits per Target + parameter int unsigned AddrMemWidth = 12, + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = (NumIn == 1) ? 1 : $clog2(NumIn) +) ( + input logic clk_i, + input logic rst_ni, + // Memory Request In + input logic [NumInLog2-1:0] req_ini_addr_i, // Initiator address + input logic [AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic req_wen_i, // Write enable + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [BeWidth-1:0] req_be_i, // Byte enable + input burst_t req_burst_i, // Burst data + input logic req_valid_i, + output logic req_ready_o, + // Memory Request Out + output logic [NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [AddrWidth-1:0] req_tgt_addr_o, // Target address + output logic req_wen_o, // Write enable + output logic [DataWidth-1:0] req_wdata_o, // Write data + output logic [BeWidth-1:0] req_be_o, // Byte enable + output burst_t req_burst_o, // Burst data + output logic req_valid_o, + input logic req_ready_i +); + + `include "common_cells/registers.svh" + + localparam int unsigned BurstLen = NumIn; + localparam int unsigned BurstLenWidth = NumInLog2; + localparam int unsigned NumBanks = NumOut; + localparam int unsigned BankOffsetBits = AddrMemWidth - ByteOffWidth; + + typedef enum logic { + Bypass, // normal requests, first cut of burst + BurstCut // second cut of burst + } burst_cutter_fsm_e; + + logic [31:0] bank_offset; + logic [31:0] max_blen; + logic [31:0] remaining_len; + assign bank_offset = {{(32-BankOffsetBits){1'b0}}, req_tgt_addr_i[AddrMemWidth-1 : ByteOffWidth]}; + assign max_blen = NumBanks - bank_offset; + assign remaining_len = {{(32-BurstLenWidth){1'b0}}, req_burst_i.blen} > max_blen ? + {{(32-BurstLenWidth){1'b0}}, req_burst_i.blen} - max_blen : '0; + + // FSM state + burst_cutter_fsm_e state_d, state_q; + burst_cutter_fsm_e next_state; + + // FSM stored signals + logic [NumInLog2-1:0] cut_ini_addr_d, cut_ini_addr_q; + logic [AddrWidth-1:0] cut_tgt_addr_d, cut_tgt_addr_q; + logic [DataWidth-1:0] cut_wdata_d, cut_wdata_q; + burst_t cut_burst_d, cut_burst_q; + + // Store FSM state and signals + `FF(state_q, state_d, Bypass, clk_i, rst_ni); + `FF(cut_burst_q, cut_burst_d, '0, clk_i, rst_ni); + `FF(cut_ini_addr_q, cut_ini_addr_d, '0, clk_i, rst_ni); + `FF(cut_tgt_addr_q, cut_tgt_addr_d, '0, clk_i, rst_ni); + `FF(cut_wdata_q, cut_wdata_d, '0, clk_i, rst_ni); + + always_comb begin + + // FSM defaults + state_d = state_q; + cut_burst_d = cut_burst_q; + cut_tgt_addr_d = cut_tgt_addr_q; + cut_ini_addr_d = cut_ini_addr_q; + cut_wdata_d = cut_wdata_q; + + // Need to cut, use FSM to realize the logic + case (state_q) + + Bypass: begin + + // Bypass the signals + req_ini_addr_o = req_ini_addr_i; + req_tgt_addr_o = req_tgt_addr_i; + req_wdata_o = req_wdata_i[0]; + req_wen_o = req_wen_i; + req_be_o = req_be_i; + req_burst_o = req_burst_i; + req_valid_o = req_valid_i; + req_ready_o = req_ready_i; + + // Check if it is valid and being a burst request + if (req_burst_i.isburst) begin + + // No support for write burst, tie to 0 + if (req_wen_i) begin + req_burst_o = '0; + + end else begin + + // Cut burst when it is longer than the max length + if (remaining_len > 0) begin + + // Send out the first burst + req_burst_o.isburst = 1'b1; + req_burst_o.blen = max_blen; + req_burst_o.gdata = '0; + // store the info for next burst + cut_ini_addr_d = req_ini_addr_i + (max_blen << ByteOffWidth); + cut_tgt_addr_d = req_tgt_addr_i + (max_blen << ByteOffWidth); + cut_wdata_d = req_wdata_i[max_blen]; + cut_burst_d.isburst = 1'b1; + cut_burst_d.blen = remaining_len[BurstLenWidth-1:0]; + cut_burst_d.gdata = '0; + + // pause taking in new requests + req_ready_o = 1'b0; + // Keep state until the current one is picked + if (req_ready_i) begin + state_d = BurstCut; + end + end + + end + end + + end + + BurstCut: begin + + // Send out second half and wait for ready + req_tgt_addr_o = cut_ini_addr_q; + req_tgt_addr_o = cut_tgt_addr_q; + req_wdata_o = cut_wdata_q; + req_wen_o = '0; // only read burst is supported + req_be_o = '0; + req_burst_o = cut_burst_q; + req_valid_o = 1'b1; + req_ready_o = 1'b0; + + // When we get the ready, the second part is out + if (req_ready_i) begin + req_ready_o = req_ready_i; + state_d = Bypass; + end + + end + + default: state_d = Bypass; + endcase + end + + /****************** + * Assertions * + ******************/ + + if (BurstLen > NumBanks) + $fatal(1, "Only one cut is supported, reduce the burst length."); + +endmodule : burst_cutter diff --git a/rtl/variable_latency_interconnect/burst_manager.sv b/rtl/variable_latency_interconnect/burst_manager.sv new file mode 100644 index 0000000..0f61491 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_manager.sv @@ -0,0 +1,401 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Diyou Shen ETH Zurich +// Author: Marco Bertuletti ETH Zurich + +/// Burst Req Manager: +/// Receives a burst request from NumIn initiators and produces a parallel request +/// to NumIn target banks in a target multi-banked memory with NumOut banks. +/// Collects a parallel response from NumOut banks in a target multi-banked memory +/// and groups them according to the RspGF. + +module burst_manager + import burst_pkg::*; +#( + parameter int unsigned NumIn = 32, // number of initiator ports + parameter int unsigned NumOut = 64, // number of destination ports + parameter int unsigned AddrWidth = 32, + parameter int unsigned DataWidth = 32, + parameter int unsigned BeWidth = DataWidth/8, + // determines the width of the byte offset in a memory word. normally this can be left at the default vaule, + // but sometimes it needs to be overridden (e.g. when meta-data is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Group Request Extension Grouping Factor for TCDM + parameter int unsigned ReqGF = 1, + // Group Response Extension Grouping Factor for TCDM + parameter int unsigned RspGF = 1, + // Datawidth of words grouped in the burst + parameter int unsigned GroupedDW = burst_pkg::GroupedDW, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = (NumIn > 32'd1) ? unsigned'($clog2(NumIn)) : 32'd1, + parameter int unsigned NumOutLog2 = (NumOut > 32'd1) ? unsigned'($clog2(NumOut)) : 32'd1 +) ( + input logic clk_i, + input logic rst_ni, + + /// Xbar side + input logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_i, + input logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr_i, + input logic [NumOut-1:0][DataWidth-1:0] req_wdata_i, + input logic [NumOut-1:0] req_wen_i, + input logic [NumOut-1:0][BeWidth-1:0] req_be_i, + input burst_t [NumOut-1:0] req_burst_i, + input logic [NumOut-1:0] req_valid_i, + output logic [NumOut-1:0] req_ready_o, + output logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_o, + output logic [NumOut-1:0][DataWidth-1:0] resp_rdata_o, + output burst_gresp_t [NumOut-1:0] resp_burst_o, + output logic [NumOut-1:0] resp_valid_o, + input logic [NumOut-1:0] resp_ready_i, + + /// Bank side + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, + output logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr_o, + output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, + output logic [NumOut-1:0] req_wen_o, + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, + output logic [NumOut-1:0] req_valid_o, + input logic [NumOut-1:0] req_ready_i, + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, + input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i, + input logic [NumOut-1:0] resp_valid_i, + output logic [NumOut-1:0] resp_ready_o +); + /************************************************************* + * req_i --+--> arbiter --> fifo --> req generator --> req_o * + * \--------------- bypass ------------------> req_o * + * rsp_o <----- data_grouper <----- rsp_i * + *************************************************************/ + + // Include FF module + `include "common_cells/registers.svh" + + localparam int unsigned NumGroupReq = ReqGF > 0 ? NumOut >> $clog2(ReqGF) : NumOut; + localparam int unsigned NumGroupRsp = RspGF > 0 ? NumOut >> $clog2(RspGF) : NumOut; + + typedef struct packed { + logic [NumInLog2-1:0] ini_addr; + logic [AddrWidth-1:0] tgt_addr; + logic [DataWidth-1:0] wdata; + logic wen; + logic [BeWidth-1:0] ben; + burst_t burst; + } arb_data_t; + + typedef struct packed { + logic [NumInLog2-1:0] ini_addr; + logic [AddrWidth-1:0] tgt_addr; + logic [DataWidth-1:0] wdata; + logic wen; + logic [BeWidth-1:0] ben; + burst_t burst; + logic [NumOutLog2-1:0] idx; + } fifo_data_t; + + logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr; + logic [NumOut-1:0][AddrWidth-1:0] req_tgt_addr; + logic [NumOut-1:0][DataWidth-1:0] req_wdata; + logic [NumOut-1:0] req_wen; + logic [NumOut-1:0][BeWidth-1:0] req_be; + burst_t [NumOut-1:0] req_burst; + logic [NumOut-1:0] req_valid_mask; + logic [NumOut-1:0] resp_valid_mask; + + arb_data_t [NumOut-1:0] prearb_data; + logic [NumOut-1:0] prearb_valid, prearb_ready; + arb_data_t postarb_data; + logic postarb_valid, postarb_ready; + logic [NumOutLog2-1:0] postarb_idx; + + fifo_data_t fifo_data, pre_fifo_data; + logic fifo_pop, fifo_empty, fifo_full, fifo_push; + + always_comb begin + + req_ini_addr = req_ini_addr_i; + req_tgt_addr = req_tgt_addr_i; + req_wdata = req_wdata_i; + req_wen = req_wen_i; + req_be = req_be_i; + req_burst = req_burst_i; + req_valid_mask = req_valid_i; + + prearb_data = '0; + prearb_valid = '0; + + /*************** + * Burst WRITE * + ***************/ + + // Redistribute grouped write requests + for (int i = 0; i < NumGroupReq; i++) begin + for (int j = 1; j < ReqGF; j++) begin + if (req_burst_i[i*ReqGF].isburst && req_wen_i[i*ReqGF]) begin + req_ini_addr[i*ReqGF+j] = req_ini_addr_i[i*ReqGF] + j; + req_tgt_addr[i*ReqGF+j] = req_tgt_addr_i[i*ReqGF] + j; + req_wdata[i*ReqGF+j][DataWidth-1:GroupedDW] = req_wdata_i[i*ReqGF][DataWidth-1:GroupedDW]; + req_wdata[i*ReqGF+j][GroupedDW-1:0] = req_burst_i[i*ReqGF].gdata[j-1]; + req_wen[i*ReqGF+j] = req_wen_i[i*ReqGF]; + req_be[i*ReqGF+j] = req_be_i[i*ReqGF]; + req_burst[i*ReqGF+j] = '0; + req_valid_mask[i*ReqGF+j] = req_valid_i[i*ReqGF]; + end + end + end + + /************** + * Burst READ * + **************/ + + // Detect and save read bursts + for (int unsigned i = 0; i < NumOut; i++) begin + if (req_burst_i[i].isburst && !req_wen_i[i]) begin + prearb_data[i].ini_addr = req_ini_addr_i[i]; + prearb_data[i].tgt_addr = req_tgt_addr_i[i]; + prearb_data[i].wdata = req_wdata_i[i]; + prearb_data[i].wen = req_wen_i[i]; + prearb_data[i].ben = req_be_i[i]; + prearb_data[i].burst = req_burst_i[i]; + prearb_valid[i] = req_valid_i[i]; + // Remove valid from bank request + req_valid_mask[i] = 1'b0; + end + end + + end + + // Round-Robin arbiter on incoming bursts + rr_arb_tree #( + .NumIn ( NumOut ), + .DataType ( arb_data_t ), + .ExtPrio ( 1'b0 ), + .AxiVldRdy ( 1'b1 ), + .LockIn ( 1'b1 ) + ) i_rr_arb_tree ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .rr_i ( '0 ), + .req_i ( prearb_valid ), + .gnt_o ( prearb_ready ), + .data_i ( prearb_data ), + .req_o ( postarb_valid ), + .gnt_i ( postarb_ready ), + .data_o ( postarb_data ), + .idx_o ( postarb_idx ) + ); + + assign postarb_ready = fifo_full ? 1'b0 : 1'b1; + assign pre_fifo_data.ini_addr = postarb_data.ini_addr; + assign pre_fifo_data.tgt_addr = postarb_data.tgt_addr; + assign pre_fifo_data.wdata = postarb_data.wdata; + assign pre_fifo_data.wen = postarb_data.wen; + assign pre_fifo_data.ben = postarb_data.ben; + assign pre_fifo_data.burst = postarb_data.burst; + assign pre_fifo_data.idx = postarb_idx; + + // Push when FIFO is not full and data is valid + assign fifo_push = postarb_valid & (~fifo_full); + + // Fall though FIFO to store bursts + fifo_v3 #( + .FALL_THROUGH ( 1'b1 ), + .DEPTH ( NumOut ), + .dtype ( fifo_data_t ) + ) i_fall_though_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( fifo_full ), + .empty_o ( fifo_empty ), + .usage_o ( /*not used */ ), + .data_i ( pre_fifo_data ), + .push_i ( fifo_push ), + .data_o ( fifo_data ), + .pop_i ( fifo_pop ) + ); + + /********************* + * Request Generator * + *********************/ + + typedef enum logic { + Idle, // idle until burst request comes + DoBurst // generate parallel requests when ready + } req_gen_fsm_e; + + // FSM state & signals + req_gen_fsm_e state_d, state_q; + fifo_data_t req_d, req_q; + + // Indicates which req inputs are involved in a burst + logic [NumOut-1:0] burst_req_mask_d, burst_req_mask_q; + // Indicates which resp inputs are involved in a burst + logic [NumOut-1:0] burst_resp_mask_d, burst_resp_mask_q; + // indicates if there is pending req/resp to be picked + logic pending_req, pending_rsp; + + // Store FSM state and signals + `FF(state_q, state_d, Idle, clk_i, rst_ni); + `FF(req_q, req_d, '0, clk_i, rst_ni); + `FF(burst_req_mask_q, burst_req_mask_d, '0, clk_i, rst_ni); + `FF(burst_resp_mask_q, burst_resp_mask_d, '0, clk_i, rst_ni); + + // Mask with burst length ones + assign burst_req_mask_d = ((1'b1 << fifo_data.burst.blen) - 1'b1) << fifo_data.idx; + + always_comb begin : request_generator + + // FSM defaults + state_d = state_q; + req_d = req_q; + + // Do not take in next burst for now + fifo_pop = 1'b0; + pending_req = 1'b0; + pending_rsp = 1'b0; + + // Bypass all requests by default + req_wdata_o = req_wdata; + req_ini_addr_o = req_ini_addr; + req_tgt_addr_o = req_tgt_addr; + req_wen_o = req_wen; + req_be_o = req_be; + req_valid_o = '0; + req_ready_o = '0; + + case (state_q) + + // Idle state, ready to take in burst request + Idle: begin + + // Let valid requests not in burst pass + req_valid_o = req_valid_mask; + req_ready_o = (req_valid_mask & req_ready_i) | (prearb_valid & prearb_ready); + + // Check if there is a req/resp on the affected banks + pending_req = |(req_valid_mask & burst_req_mask_d); + pending_rsp = |(resp_valid_mask & burst_req_mask_d); + + // Start pending burst + if (!fifo_empty && !pending_req && !pending_rsp) begin + fifo_pop = 1'b1; + req_d = fifo_data; + state_d = DoBurst; + end + + end + + DoBurst: begin + + // Let valid requests not in burst pass + req_valid_o = req_valid_mask & ~burst_req_mask_q; + req_ready_o = ((req_valid_mask & req_ready_i) & ~burst_req_mask_q) | (prearb_valid & prearb_ready); + + // Write the request on burst banks + for (int unsigned i = 0; i < NumOut; i++) begin + if (burst_req_mask_q[i]) begin + req_wdata_o[i] = req_q.wdata; + req_tgt_addr_o[i] = i + req_q.tgt_addr - req_q.idx; + req_ini_addr_o[i] = i + req_q.ini_addr - req_q.idx; + req_wen_o[i] = req_q.wen; + req_be_o[i] = req_q.ben; + req_valid_o[i] = 1'b1; + end + end + + state_d = Idle; + + end + + default: state_d = Idle; + endcase + end + + /*********************** + * Response Handling * + ***********************/ + + assign resp_valid_o = resp_valid_mask; + + if (RspGF == 1) begin : gen_grouper_bypass + + // Bypass all responses if no grouping + assign resp_valid_mask = resp_valid_i; + assign resp_ready_o = resp_ready_i; + assign resp_rdata_o = resp_rdata_i; + assign resp_ini_addr_o = resp_ini_addr_i; + assign resp_burst_o = '0; + + end else begin : gen_grouper + + logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr; + logic [NumOut-1:0][DataWidth-1:0] resp_rdata; + burst_gresp_t [NumOut-1:0] resp_burst; + logic [NumOut-1:0] resp_valid; + logic [NumOut-1:0] resp_ready; + + // Mark the ports requested in burst until response is sent + always_comb begin + for (int i = 0; i < NumGroupRsp; i ++) begin + + burst_resp_mask_d[i*RspGF+:RspGF] = burst_resp_mask_q[i*RspGF+:RspGF]; + // If ready cancel the reservation + if (burst_resp_mask_q[i*RspGF] && resp_valid[i*RspGF] && resp_ready_i[i*RspGF]) begin + burst_resp_mask_d[i*RspGF+:RspGF] = '0; + end + // If new burst mark the affected banks + if (state_q == DoBurst) begin + burst_resp_mask_d[i*RspGF+:RspGF] = burst_resp_mask_d[i*RspGF+:RspGF] | burst_req_mask_q[i*RspGF+:RspGF]; + end + + end + end + + // Assign input data to grouped response + always_comb begin + for (int i = 0; i < NumGroupRsp; i++) begin + + resp_ini_addr[i*RspGF] = resp_ini_addr_i[i*RspGF]; + resp_rdata[i*RspGF] = resp_rdata_i[i*RspGF]; + resp_burst[i*RspGF].isburst = &resp_valid_i[i*RspGF+:RspGF]; + resp_valid[i*RspGF] = &resp_valid_i[i*RspGF+:RspGF]; + resp_ready[i*RspGF] = resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]; + + for (int j = 1; j < RspGF; j++) begin + resp_ini_addr[i*RspGF+j] = '0; + resp_rdata[i*RspGF+j] = '0; + resp_burst[i*RspGF].gdata[j-1] = resp_rdata_i[i*RspGF+j][GroupedDW-1:0]; + resp_burst[i*RspGF+j].isburst = 1'b0; + resp_valid[i*RspGF+j] = 1'b0; + // grouped response is ready if the i*RspGF'th output handshakes + resp_ready[i*RspGF+j] = resp_valid_o[i*RspGF] && resp_ready_i[i*RspGF]; + end + + end + end + + // Assign outputs + for (genvar i = 0; i < NumOut; i++) begin + assign resp_ini_addr_o[i] = burst_resp_mask_q[i] ? resp_ini_addr[i] : resp_ini_addr_i[i]; + assign resp_rdata_o[i] = burst_resp_mask_q[i] ? resp_rdata[i] : resp_rdata_i[i]; + assign resp_burst_o[i].gdata = burst_resp_mask_q[i] ? resp_burst[i].gdata : '0; + assign resp_burst_o[i].isburst = burst_resp_mask_q[i] ? resp_burst[i].isburst : 1'b0; + assign resp_valid_mask[i] = burst_resp_mask_q[i] ? resp_valid[i] : resp_valid_i[i]; + assign resp_ready_o[i] = burst_resp_mask_q[i] ? resp_ready[i] : (resp_valid_mask[i] && resp_ready_i[i]); + end + end + + /****************** + * Assertions * + ******************/ + if (NumOut == 0) + $error("[burst_manager] NumBanks needs to be greater or equal to 1"); + + if (NumOut < RspGF) + $error("[burst_manager] NumBanks needs to be larger or equal to RspGF"); + +endmodule : burst_manager diff --git a/rtl/variable_latency_interconnect/burst_pkg.sv b/rtl/variable_latency_interconnect/burst_pkg.sv new file mode 100644 index 0000000..074f242 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_pkg.sv @@ -0,0 +1,51 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Diyou Shen, ETH Zurich +// Author: Marco Bertuletti ETH Zurich + +// Description +// Include TCDM burst types and grouped response types + +package burst_pkg; + /******************** + * BURST PARAMETERS * + ********************/ + + // Memory read requests are bursted + localparam bit UseBurst = `ifdef USE_BURST `USE_BURST `else 0 `endif; + + // Maximum length of the issued burst + localparam integer unsigned BurstLen = `ifdef BURSTLEN `BURSTLEN `else 1 `endif; + parameter int unsigned BurstLenWidth = BurstLen == 1 ? 0 : $clog2(BurstLen)+1; + + // Grouped request in bursted writes + localparam integer unsigned ReqGF = `ifdef GROUP_REQ `GROUP_REQ `else 1 `endif; + localparam integer unsigned GroupedDW = 32; + localparam int ReqBurstMSB = (ReqGF > 1) ? (ReqGF - 2) : 0; + + // Number of cuts if a burst crosses the target memory boundary + localparam integer unsigned NumCuts = 1; + + typedef struct packed { + logic isburst; + logic [BurstLenWidth-1:0] blen; + logic [ReqBurstMSB:0][GroupedDW-1:0] gdata; + } burst_t; + + /******************************** + * Burst Grouped Rsp PARAMETERS * + ********************************/ + + // Grouping Factor of response data + localparam integer unsigned RspGF = `ifdef GROUP_RSP `GROUP_RSP `else 1 `endif; + localparam int RspBurstMSB = (RspGF > 1) ? (RspGF - 2) : 0; + + // Add this to rdata payload when the response is grouped + typedef struct packed { + logic isburst; + logic [RspBurstMSB:0][GroupedDW-1:0] gdata; + } burst_gresp_t; + +endpackage : burst_pkg diff --git a/rtl/variable_latency_interconnect/burst_req_grouper.sv b/rtl/variable_latency_interconnect/burst_req_grouper.sv new file mode 100644 index 0000000..a3b9ab8 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_req_grouper.sv @@ -0,0 +1,272 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Author: Marco Bertuletti ETH Zurich + +/// Burst Req Grouper: +/// Packs a parallel memory request from NumIn initiators in a burst request. +/// The burst cutter creates multiple burst requests when the burst request crosses +/// the boundary in the target multi-banked memory. + +module burst_req_grouper + import burst_pkg::burst_t; + import burst_pkg::burst_gresp_t; +#( + parameter int unsigned NumIn = 32, + parameter int unsigned NumOut = 32, + parameter int unsigned AddrWidth = 32, + parameter int unsigned DataWidth = 32, + parameter int unsigned BeWidth = DataWidth/8, + // Number of Address bits per Target + parameter int unsigned AddrMemWidth = 12, + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Group Request Extension Grouping Factor for TCDM + parameter int unsigned ReqGF = 1, + // Group Response Extension Grouping Factor for TCDM + parameter int unsigned RspGF = 1, + // Datawidth of words grouped in the burst + parameter int unsigned GroupedDW = burst_pkg::GroupedDW, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) +)( + input logic clk_i, + input logic rst_ni, + // Parallel input request port + input logic [NumIn-1:0][NumInLog2-1:0] req_ini_addr_i, // Initiator address + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, + input logic [NumIn-1:0] req_wen_i, + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, + input logic [NumIn-1:0] req_valid_i, + output logic [NumIn-1:0] req_ready_o, + // Burst output request port + output logic [NumIn-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumIn-1:0][DataWidth-1:0] req_wdata_o, + output logic [NumIn-1:0] req_wen_o, + output logic [NumIn-1:0][BeWidth-1:0] req_be_o, + output burst_t [NumIn-1:0] req_burst_o, + output logic [NumIn-1:0] req_valid_o, + input logic [NumIn-1:0] req_ready_i, + // Response out + output logic [NumIn-1:0][NumInLog2-1:0] resp_ini_addr_o, + output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, + output logic [NumIn-1:0] resp_valid_o, + input logic [NumIn-1:0] resp_ready_i, + // Response in + input logic [NumIn-1:0][NumInLog2-1:0] resp_ini_addr_i, + input logic [NumIn-1:0][DataWidth-1:0] resp_rdata_i, + input burst_gresp_t [NumIn-1:0] resp_burst_i, + input logic [NumIn-1:0] resp_valid_i, + output logic [NumIn-1:0] resp_ready_o +); + + `include "common_cells/registers.svh" + localparam int unsigned NumGroupReq = ReqGF > 1 ? NumIn >> $clog2(ReqGF) : NumIn; + localparam int unsigned NumGroupRsp = RspGF > 1 ? NumIn >> $clog2(RspGF) : NumIn; + + /*************/ + /* Request */ + /*************/ + + logic [NumIn-1:0][DataWidth-1:0] req_cutter_wdata; + logic [NumInLog2-1:0] req_cutter_ini_addr; + logic [AddrWidth-1:0] req_cutter_tgt_addr; + logic req_cutter_wen; + logic [BeWidth-1:0] req_cutter_be; + burst_t req_cutter_burst; + logic cutter_ready; + + logic [NumInLog2-1:0] req_bursted_ini_addr; + logic [AddrWidth-1:0] req_bursted_tgt_addr; + logic [DataWidth-1:0] req_bursted_wdata; + logic req_bursted_wen; + logic [BeWidth-1:0] req_bursted_be; + burst_t req_bursted_burst; + logic req_bursted_valid; + + // To verify that the request goes to consecutive addresses + logic [NumIn-2:0] consecutive; + logic consecutive_read, consecutive_write; + + always_comb begin + + // Bypass input + req_ini_addr_o = req_ini_addr_i; + req_tgt_addr_o = req_tgt_addr_i; + req_wdata_o = req_wdata_i; + req_wen_o = req_wen_i; + req_be_o = req_be_i; + req_burst_o = '0; + req_valid_o = req_valid_i; + req_ready_o = req_ready_i; + + // Check if request goes to consecutive addresses + for (int i = 0; i < NumIn-1; i++) begin + consecutive[i] = (req_tgt_addr_i[i+1][AddrWidth-1:ByteOffWidth] + - req_tgt_addr_i[i][AddrWidth-1:ByteOffWidth]) == AddrWidth'(1); + end + + /* WRITE */ + + // Assign grouped requests + if (ReqGF > 1) begin + for (int i = 0; i < NumGroupReq; i++) begin + consecutive_write = &consecutive[i*ReqGF+:(ReqGF-1)] && &req_wen_i[i*ReqGF+:ReqGF]; + if (&req_valid_i[i*ReqGF+:ReqGF] && consecutive_write) begin + req_ini_addr_o[i*ReqGF] = req_ini_addr_i[i*ReqGF]; + req_tgt_addr_o[i*ReqGF] = req_tgt_addr_i[i*ReqGF]; + req_wdata_o[i*ReqGF] = req_wdata_i[i*ReqGF]; + req_wen_o[i*ReqGF] = req_wen_i[i*ReqGF]; + req_be_o[i*ReqGF] = req_be_i[i*ReqGF]; + req_burst_o[i*ReqGF].isburst = 1'b1; + req_burst_o[i*ReqGF].blen = '0; + req_valid_o[i*ReqGF] = req_valid_i[i*ReqGF]; + req_ready_o[i*ReqGF] = req_valid_o[i*ReqGF] && req_ready_i[i*ReqGF]; + for (int j = 1; j < ReqGF; j++) begin + req_ini_addr_o[i*ReqGF+j] = '0; + req_tgt_addr_o[i*ReqGF+j] = '0; + req_wdata_o[i*ReqGF+j] = '0; + req_wen_o[i*ReqGF+j] = 1'b0; + req_be_o[i*ReqGF+j] = '0; + req_burst_o[i*ReqGF+j] = '0; + req_valid_o[i*ReqGF+j] = 1'b0; + req_ready_o[i*ReqGF+j] = req_valid_o[i*ReqGF] && req_ready_i[i*ReqGF]; + // Redistribute the outputs from the i*RspGF'th input + req_burst_o[i*ReqGF].gdata = req_wdata_i[i*ReqGF+j]; + end + end + end + end + + /* READ */ + + // Assign input requests to cutter inputs + req_cutter_tgt_addr = req_tgt_addr_i[0]; + req_cutter_wdata = req_wdata_i; + req_cutter_wen = req_wen_i[0]; + req_cutter_be = req_be_i[0]; + req_cutter_burst.isburst = 1'b0; + req_cutter_burst.blen = NumIn; + req_cutter_burst.gdata = '0; + + consecutive_read = &consecutive && (~|req_wen_i); + + // Burst the read request + if (&req_valid_i && consecutive_read) begin + req_cutter_burst.isburst = 1'b1; + req_ini_addr_o[0] = req_bursted_ini_addr; + req_tgt_addr_o[0] = req_bursted_tgt_addr; + req_wdata_o[0] = req_bursted_wdata; + req_wen_o[0] = req_bursted_wen; + req_be_o[0] = req_bursted_be; + req_burst_o[0] = req_bursted_burst; + req_valid_o[0] = req_bursted_valid; + req_ready_o[0] = cutter_ready; + // Silence other ports + for (int i = 1; i < NumIn; i++) begin + req_ini_addr_o[i] = '0; + req_tgt_addr_o[i] = '0; + req_wdata_o[i] = '0; + req_wen_o[i] = 1'b0; + req_be_o[i] = '0; + req_burst_o[i] = '0; + req_valid_o[i] = 1'b0; + req_ready_o[i] = cutter_ready; + end + end + + end + + burst_cutter #( + .NumIn (NumIn ), + .NumOut (NumOut ), + .AddrWidth (AddrWidth ), + .DataWidth (DataWidth ), + .BeWidth (BeWidth ), + .AddrMemWidth (AddrMemWidth ), + .ByteOffWidth (ByteOffWidth ) + ) i_burst_cutter ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + // Memory Request In + .req_ini_addr_i (req_cutter_ini_addr ), + .req_tgt_addr_i (req_cutter_tgt_addr ), + .req_wen_i (req_cutter_wen ), + .req_wdata_i (req_cutter_wdata ), + .req_be_i (req_cutter_be ), + .req_burst_i (req_cutter_burst ), + .req_valid_i (req_valid_i[0] ), + .req_ready_o (cutter_ready ), + // Memory Request Out + .req_ini_addr_o (req_bursted_ini_addr ), + .req_tgt_addr_o (req_bursted_tgt_addr ), + .req_wen_o (req_bursted_wen ), + .req_wdata_o (req_bursted_wdata ), + .req_be_o (req_bursted_be ), + .req_burst_o (req_bursted_burst ), + .req_valid_o (req_bursted_valid ), + .req_ready_i (req_ready_i[0] ) + ); + + /*************/ + /* Response */ + /*************/ + + if (RspGF == 1) begin: gen_default_assignment + + // Default assignment + assign resp_ini_addr_o = resp_ini_addr_i; + assign resp_rdata_o = resp_rdata_i; + assign resp_valid_o = resp_valid_i; + assign resp_ready_o = resp_ready_i; + + end else begin: gen_grouped_resp_assignment + + always_comb begin + // Default assignment + resp_ini_addr_o = resp_ini_addr_i; + resp_rdata_o = resp_rdata_i; + resp_valid_o = resp_valid_i; + resp_ready_o = resp_ready_i; + + for (int ii = 0; ii < NumGroupRsp; ii++) begin + if (resp_valid_i[ii*RspGF] && resp_burst_i[ii*RspGF].isburst) begin + // If the response is grouped only one every RspGF input will be + // valid. If any of the other inputs is valid give them priority. + // Otherwise assign to the other ports the response from the + // (ii*RspGF)'th port and signal them valid. + if (|resp_valid_o[(ii*RspGF+1)+:(RspGF-1)]) begin + resp_ini_addr_o[ii*RspGF] = '0; + resp_rdata_o[ii*RspGF] = '0; + resp_valid_o[ii*RspGF] = 1'b0; + resp_ready_o[ii*RspGF] = 1'b0; + end else begin + // Assign values from port ii*RspGF + resp_ini_addr_o[ii*RspGF] = resp_ini_addr_i[ii*RspGF]; + resp_rdata_o[ii*RspGF] = resp_rdata_i[ii*RspGF]; + resp_valid_o[ii*RspGF] = resp_valid_i[ii*RspGF]; + // Send ready back only when all the ports are ready + resp_ready_o[ii*RspGF] = &resp_ready_i[ii*RspGF+:RspGF]; + for (int jj = 1; jj < RspGF; jj++) begin + resp_ini_addr_o[ii*RspGF+jj] = resp_ini_addr_i[ii*RspGF] + jj; + // TODO: This is necessary to assign all the response fields by + // default to the value of the (ii*RspGF)'th port. It assumes + // that the actual data payload is in the LSBs. + resp_rdata_o[ii*RspGF+jj] = (DataWidth > GroupedDW) ? {resp_rdata_i[ii*RspGF][DataWidth-1:GroupedDW], resp_burst_i[ii*RspGF].gdata[jj-1]} : + resp_burst_i[ii*RspGF].gdata[jj-1]; + resp_valid_o[ii*RspGF+jj] = resp_valid_i[ii*RspGF]; + resp_ready_o[ii*RspGF+jj] = 1'b0; + end + end + end + end + end + + end + + +endmodule : burst_req_grouper diff --git a/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv new file mode 100644 index 0000000..c203625 --- /dev/null +++ b/rtl/variable_latency_interconnect/burst_variable_latency_interconnect.sv @@ -0,0 +1,136 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Michael Schaffner , ETH Zurich +// Matheus Cavalcante , ETH Zurich +// Marco Bertuletti , ETH Zurich + +// Date: 16.01.2020 + +// Description: Interconnect with support to variable target latencies with different +// network topologies. Currently supported are: full crossbar and radix-2/4 butterflies. +// Note that only the full crossbar allows NumIn/NumOut configurations that are not +// aligned to a power of 2. + +module burst_variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( + // Global parameters + parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. + parameter int unsigned NumOut = 64, // Number of Targets. Must be aligned with a power of 2 for butterflies. + parameter int unsigned AddrWidth = 32, // Address Width on the Initiator Side + parameter int unsigned DataWidth = 32, // Data Word Width + parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width + parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target + parameter int unsigned BurstWidth = 1, // Burst Signal Width + parameter int unsigned BurstRspWidth = 1, // Burst Response Widening + parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling + // Spill registers + // A bit set at position i indicates a spill register at the i-th crossbar layer. + // The layers are counted starting at 0 from the initiator, for the requests, and from the target, for the responses. + parameter logic [63:0] SpillRegisterReq = 64'h0, + parameter logic [63:0] SpillRegisterResp = 64'h0, + parameter bit FallThroughRegister = 1'b0, // Insert a fall-through register, if missing a spill register in that stage + // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, + // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). + parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + // Topology can be: LIC, BFLY2, BFLY4, CLOS + parameter topo_e Topology = tcdm_interconnect_pkg::LIC, + // Dependant parameters. DO NOT CHANGE! + parameter int unsigned NumInLog2 = NumIn == 1 ? 1 : $clog2(NumIn) +) ( + input logic clk_i, + input logic rst_ni, + // Initiator side + input logic [NumIn-1:0] req_valid_i, // Request valid + output logic [NumIn-1:0] req_ready_o, // Request ready + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0] req_wen_i, // Write enable + input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable + input logic [NumIn-1:0][BurstWidth-1:0] req_burst_i, // Burst data + output logic [NumIn-1:0] resp_valid_o, // Response valid + input logic [NumIn-1:0] resp_ready_i, // Response ready + output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + output logic [NumIn-1:0][BurstRspWidth-1:0] resp_burst_o, // Burst response + // Target side + output logic [NumOut-1:0] req_valid_o, // Request valid + input logic [NumOut-1:0] req_ready_i, // Request ready + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumOut-1:0] req_wen_o, // Write enable + output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable + output logic [NumOut-1:0][BurstWidth-1:0] req_burst_o, // Burst data + input logic [NumOut-1:0] resp_valid_i, // Response valid + output logic [NumOut-1:0] resp_ready_o, // Response ready + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address + input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i, // Data response + input logic [NumOut-1:0][BurstRspWidth-1:0] resp_burst_i // Burst response +); + + localparam int unsigned ReqAggDataWidth = DataWidth + BurstWidth; + localparam int unsigned RespAggDataWidth = DataWidth + BurstRspWidth; + + logic [NumIn-1:0][ReqAggDataWidth-1:0] req_agg_data_in; + logic [NumOut-1:0][ReqAggDataWidth-1:0] req_agg_data_out; + + logic [NumIn-1:0][RespAggDataWidth-1:0] resp_agg_data_out; + logic [NumOut-1:0][RespAggDataWidth-1:0] resp_agg_data_in; + + for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs + assign req_agg_data_in[j] = {req_wdata_i[j], req_burst_i[j]}; + assign {resp_rdata_o[j], resp_burst_o[j]} = resp_agg_data_out[j]; + end + + for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs + assign {req_wdata_o[k], req_burst_o[k]} = req_agg_data_out[k]; + assign resp_agg_data_in[k] = {resp_rdata_i[k], resp_burst_i[k]}; + end + + variable_latency_interconnect #( + .NumIn (NumIn ), + .NumOut (NumOut ), + .AddrWidth (AddrWidth ), + .ReqDataWidth (ReqAggDataWidth ), + .RespDataWidth (RespAggDataWidth ), + .BeWidth (BeWidth ), + .AddrMemWidth (AddrMemWidth ), + .AxiVldRdy (AxiVldRdy ), + .SpillRegisterReq (SpillRegisterReq ), + .SpillRegisterResp (SpillRegisterResp ), + .FallThroughRegister (FallThroughRegister ), + .ByteOffWidth (ByteOffWidth ), + .Topology (Topology ) + ) i_variable_latency_interconnect ( + .clk_i, + .rst_ni, + .req_valid_i (req_valid_i ), + .req_ready_o (req_ready_o ), + .req_tgt_addr_i (req_tgt_addr_i ), + .req_wen_i (req_wen_i ), + .req_wdata_i (req_agg_data_in ), + .req_be_i (req_be_i ), + .resp_valid_o (resp_valid_o ), + .resp_ready_i (resp_ready_i ), + .resp_rdata_o (resp_agg_data_out ), + // Target side + .req_valid_o (req_valid_o ), + .req_ready_i (req_ready_i ), + .req_ini_addr_o (req_ini_addr_o ), + .req_tgt_addr_o (req_tgt_addr_o ), + .req_wen_o (req_wen_o ), + .req_wdata_o (req_agg_data_out ), + .req_be_o (req_be_o ), + .resp_valid_i (resp_valid_i ), + .resp_ready_o (resp_ready_o ), + .resp_ini_addr_i (resp_ini_addr_i ), + .resp_rdata_i (resp_agg_data_in ) + ); + +endmodule : burst_variable_latency_interconnect diff --git a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv index 8c6eaae..6139e1c 100644 --- a/rtl/variable_latency_interconnect/variable_latency_interconnect.sv +++ b/rtl/variable_latency_interconnect/variable_latency_interconnect.sv @@ -10,6 +10,7 @@ // Author: Michael Schaffner , ETH Zurich // Matheus Cavalcante , ETH Zurich +// Marco Bertuletti , ETH Zurich // Date: 16.01.2020 @@ -23,8 +24,10 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( parameter int unsigned NumIn = 32, // Number of Initiators. Must be aligned with a power of 2 for butterflies. parameter int unsigned NumOut = 64, // Number of Targets. Must be aligned with a power of 2 for butterflies. parameter int unsigned AddrWidth = 32, // Address Width on the Initiator Side - parameter int unsigned DataWidth = 32, // Data Word Width - parameter int unsigned BeWidth = DataWidth/8, // Byte Strobe Width + parameter int unsigned DataWidth = 32, + parameter int unsigned ReqDataWidth = DataWidth, // Data Word Width on the Request path + parameter int unsigned RespDataWidth = DataWidth, // Data Word Width on the Response path + parameter int unsigned BeWidth = ReqDataWidth/8, // Byte Strobe Width parameter int unsigned AddrMemWidth = 12, // Number of Address bits per Target parameter bit AxiVldRdy = 1'b1, // Valid/ready signaling // Spill registers @@ -35,7 +38,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( parameter bit FallThroughRegister = 1'b0, // Insert a fall-through register, if missing a spill register in that stage // Determines the width of the byte offset in a memory word. Normally this can be left at the default value, // but sometimes it needs to be overridden (e.g., when metadata is supplied to the memory via the wdata signal). - parameter int unsigned ByteOffWidth = $clog2(DataWidth-1)-3, + parameter int unsigned ByteOffWidth = $clog2(ReqDataWidth-1)-3, // Topology can be: LIC, BFLY2, BFLY4, CLOS parameter topo_e Topology = tcdm_interconnect_pkg::LIC, // Dependant parameters. DO NOT CHANGE! @@ -44,27 +47,27 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( input logic clk_i, input logic rst_ni, // Initiator side - input logic [NumIn-1:0] req_valid_i, // Request valid - output logic [NumIn-1:0] req_ready_o, // Request ready - input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address - input logic [NumIn-1:0] req_wen_i, // Write enable - input logic [NumIn-1:0][DataWidth-1:0] req_wdata_i, // Write data - input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable - output logic [NumIn-1:0] resp_valid_o, // Response valid - input logic [NumIn-1:0] resp_ready_i, // Response ready - output logic [NumIn-1:0][DataWidth-1:0] resp_rdata_o, // Data response + input logic [NumIn-1:0] req_valid_i, // Request valid + output logic [NumIn-1:0] req_ready_o, // Request ready + input logic [NumIn-1:0][AddrWidth-1:0] req_tgt_addr_i, // Target address + input logic [NumIn-1:0] req_wen_i, // Write enable + input logic [NumIn-1:0][ReqDataWidth-1:0] req_wdata_i, // Write data + input logic [NumIn-1:0][BeWidth-1:0] req_be_i, // Byte enable + output logic [NumIn-1:0] resp_valid_o, // Response valid + input logic [NumIn-1:0] resp_ready_i, // Response ready + output logic [NumIn-1:0][RespDataWidth-1:0] resp_rdata_o, // Data response // Target side - output logic [NumOut-1:0] req_valid_o, // Request valid - input logic [NumOut-1:0] req_ready_i, // Request ready - output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address - output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address - output logic [NumOut-1:0] req_wen_o, // Write enable - output logic [NumOut-1:0][DataWidth-1:0] req_wdata_o, // Write data - output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable - input logic [NumOut-1:0] resp_valid_i, // Response valid - output logic [NumOut-1:0] resp_ready_o, // Response ready - input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address - input logic [NumOut-1:0][DataWidth-1:0] resp_rdata_i // Data response + output logic [NumOut-1:0] req_valid_o, // Request valid + input logic [NumOut-1:0] req_ready_i, // Request ready + output logic [NumOut-1:0][NumInLog2-1:0] req_ini_addr_o, // Initiator address + output logic [NumOut-1:0][AddrMemWidth-1:0] req_tgt_addr_o, // Target address + output logic [NumOut-1:0] req_wen_o, // Write enable + output logic [NumOut-1:0][ReqDataWidth-1:0] req_wdata_o, // Write data + output logic [NumOut-1:0][BeWidth-1:0] req_be_o, // Byte enable + input logic [NumOut-1:0] resp_valid_i, // Response valid + output logic [NumOut-1:0] resp_ready_o, // Response ready + input logic [NumOut-1:0][NumInLog2-1:0] resp_ini_addr_i, // Initiator address + input logic [NumOut-1:0][RespDataWidth-1:0] resp_rdata_i // Data response ); /****************** @@ -74,35 +77,40 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( // localparams and aggregation of address, wen and payload data localparam int unsigned NumOutLog2 = $clog2(NumOut); - localparam int unsigned IniAggDataWidth = 1 + BeWidth + AddrMemWidth + DataWidth; + localparam int unsigned ReqAggDataWidth = 1 + BeWidth + AddrMemWidth + ReqDataWidth; + localparam int unsigned RespAggDataWidth = RespDataWidth ; /************* * Signals * *************/ - logic [NumIn-1:0][IniAggDataWidth-1:0] data_agg_in; - logic [NumOut-1:0][IniAggDataWidth-1:0] data_agg_out; + logic [NumIn-1:0][ReqAggDataWidth-1:0] req_agg_in; + logic [NumOut-1:0][ReqAggDataWidth-1:0] req_agg_out; + + logic [NumIn-1:0][RespAggDataWidth-1:0] resp_agg_out; + logic [NumOut-1:0][RespAggDataWidth-1:0] resp_agg_in; + logic [NumIn-1:0][cf_math_pkg::idx_width(NumOut)-1:0] tgt_sel; for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_inputs - // Extract target index - if (NumIn == 1) begin - assign tgt_sel[j] = '0; - end else begin - if (NumOut == 1) begin - assign tgt_sel[j] = 0; - end else begin - assign tgt_sel[j] = req_tgt_addr_i[j][ByteOffWidth +: NumOutLog2]; - end - end - // Aggregate data to be routed to targets - assign data_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j]}; + assign req_agg_in[j] = {req_wen_i[j], req_be_i[j], req_tgt_addr_i[j][ByteOffWidth + NumOutLog2 +: AddrMemWidth], req_wdata_i[j]}; + assign resp_rdata_o[j] = resp_agg_out[j]; end - // Disaggregate data for (genvar k = 0; unsigned'(k) < NumOut; k++) begin : gen_outputs - assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k]} = data_agg_out[k]; + // Disaggregate data + assign {req_wen_o[k], req_be_o[k], req_tgt_addr_o[k], req_wdata_o[k]} = req_agg_out[k]; + assign resp_agg_in[k] = resp_rdata_i[k]; + end + + for (genvar j = 0; unsigned'(j) < NumIn; j++) begin : gen_target + // Extract target index + if (NumOut == 1) begin + assign tgt_sel[j] = 0; + end else begin + assign tgt_sel[j] = req_tgt_addr_i[j][ByteOffWidth +: NumOutLog2]; + end end /**************** @@ -114,17 +122,17 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( assign req_valid_o = req_valid_i; assign req_ready_o = req_ready_i; assign req_ini_addr_o = '0; - assign data_agg_out = data_agg_in; + assign req_agg_out = req_agg_in; assign resp_valid_o = resp_valid_i; assign resp_ready_o = resp_ready_i; - assign resp_rdata_o = resp_rdata_i; + assign resp_agg_out = resp_agg_in; // Tuned logarithmic interconnect architecture, based on rr_arb_tree primitives end else if (Topology == tcdm_interconnect_pkg::LIC) begin : gen_lic full_duplex_xbar #( .NumIn (NumIn ), .NumOut (NumOut ), - .ReqDataWidth (IniAggDataWidth ), - .RespDataWidth (DataWidth ), + .ReqDataWidth (ReqAggDataWidth ), + .RespDataWidth (RespAggDataWidth ), .AxiVldRdy (AxiVldRdy ), .SpillRegisterReq (SpillRegisterReq[0] ), .SpillRegisterResp (SpillRegisterResp[0]), @@ -139,19 +147,19 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( .req_valid_i (req_valid_i ), .req_ready_o (req_ready_o ), .req_tgt_addr_i (tgt_sel ), - .req_wdata_i (data_agg_in ), + .req_wdata_i (req_agg_in ), .resp_valid_o (resp_valid_o ), - .resp_rdata_o (resp_rdata_o ), + .resp_rdata_o (resp_agg_out ), .resp_ready_i (resp_ready_i ), // Target side .req_valid_o (req_valid_o ), .req_ini_addr_o (req_ini_addr_o ), .req_ready_i (req_ready_i ), - .req_wdata_o (data_agg_out ), + .req_wdata_o (req_agg_out ), .resp_valid_i (resp_valid_i ), .resp_ready_o (resp_ready_o ), .resp_ini_addr_i(resp_ini_addr_i), - .resp_rdata_i (resp_rdata_i ) + .resp_rdata_i (resp_agg_in ) ); end @@ -200,7 +208,7 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( variable_latency_bfly_net #( .NumIn (NumIn ), .NumOut (NumOut ), - .DataWidth (IniAggDataWidth ), + .DataWidth (ReqAggDataWidth ), .Radix (Radix ), .ExtPrio (1'b0 ), .SpillRegister (SpillRegisterReq ), @@ -215,18 +223,18 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( .valid_i (req_valid_i ), .ready_o (req_ready_o ), .tgt_addr_i(tgt_sel ), - .wdata_i (data_agg_in ), + .wdata_i (req_agg_in ), // Target side .valid_o (req_valid_o ), .ini_addr_o(req_ini_addr_o ), .ready_i (req_ready_i ), - .wdata_o (data_agg_out ) + .wdata_o (req_agg_out ) ); variable_latency_bfly_net #( .NumIn (NumOut ), .NumOut (NumIn ), - .DataWidth (DataWidth ), + .DataWidth (RespAggDataWidth ), .Radix (Radix ), .ExtPrio (1'b0 ), .SpillRegister (SpillRegisterResp ), @@ -241,12 +249,12 @@ module variable_latency_interconnect import tcdm_interconnect_pkg::topo_e; #( .valid_i (resp_valid_i ), .ready_o (resp_ready_o ), .tgt_addr_i(resp_ini_addr_i), - .wdata_i (resp_rdata_i ), + .wdata_i (resp_agg_i ), // Initiator side .valid_o (resp_valid_o ), .ready_i (resp_ready_i ), .ini_addr_o(/* Unused */ ), - .wdata_o (resp_rdata_o ) + .wdata_o (resp_agg_o ) ); end