diff --git a/.github/workflows/scala-unit-test.yml b/.github/workflows/scala-unit-test.yml index ba1d27022..0b334ef54 100644 --- a/.github/workflows/scala-unit-test.yml +++ b/.github/workflows/scala-unit-test.yml @@ -5,7 +5,7 @@ # Xiaoling Yi # Run Scala Unit Test -name: Unit Test +name: Run Scala Unit Test on: push: branches: ["main"] @@ -20,6 +20,6 @@ jobs: steps: - uses: actions/checkout@v2 - name: Run the unit tests - working-directory: util/chiselgen + working-directory: hw/chisel run: | sbt test diff --git a/hw/chisel/.gitignore b/hw/chisel/.gitignore new file mode 100644 index 000000000..e4f3f71dd --- /dev/null +++ b/hw/chisel/.gitignore @@ -0,0 +1,7 @@ +generated +project/* +!project/plugins.sbt +!project/build.properties +target +test_run_dir +.bsp diff --git a/hw/chisel/README.md b/hw/chisel/README.md new file mode 100644 index 000000000..44d4000f9 --- /dev/null +++ b/hw/chisel/README.md @@ -0,0 +1,8 @@ +# SNAX Framework Chisel components +This directory contains SNAX framework components developed in Chisel. You can find documentation for these modules here: + +### Streamer +[documentation](doc/streamer.md) + +### CSR Manager +TODO diff --git a/hw/chisel/doc/streamer.md b/hw/chisel/doc/streamer.md new file mode 100644 index 000000000..f372ee6f6 --- /dev/null +++ b/hw/chisel/doc/streamer.md @@ -0,0 +1,230 @@ +# Flexible Streamer Generator for SNAX + +Every accelerator has a specific layout for data that comes in and goes out, but the data layout in memory may not be in the same format. This causes data layout management overhead on both the accelerator side and the compiler side. The Flexible Streamer Generator is a tool to generate _Streamers_ for an accelerator. A Streamer is a reusable hardware block designed to handle the memory accesses, decoupling the accelerator from the actual memory system, and simplifying the interface to the accelerator. The streamer's accelerator and data interfaces follow the valid-ready handshake. On top of this, the pre-fetch mechanism inside the Streamer also helps to relieve data contention in the memory, maximizing an accelerator's data-compute bandwidth. + +The Streamer has a compatible interface with the [SNAX core](https://github.com/KULeuven-micas/snitch_cluster) and can be integrated into it as a data streaming engine. + +It is written in CHISEL 5.0.0 and is intended to be connected to the SNAX accelerator RISC-V manager core through a SystemVerilog wrapper. + +Thanks to the strong expression power of Chisel, the Flexible Streamer Generator has the capability of dealing with any number of temporal loops and any number of parallel loops for data address generation. More importantly, the Streamer generator is accelerator agnostic, highly flexible, and parameterizable which means it is suitable for a wide range of accelerators employing a [tiled-strided-layout](https://github.com/KULeuven-MICAS/snax-mlir/tree/main/compiler/ir/tsl). We have generated Streamers for three accelerators, including the SNAX-GEMM, SNAX-PostProcessing-SIMD, and MAC Engine. + +## Microarchitecture +The microarchitecture of the Flexible Streamer Generator is shown below. + +![image](https://github.com/KULeuven-MICAS/snitch_cluster/assets/47864363/b499c7be-f897-462d-a662-1a0dd18d9c3f) + +The main module of the Streamer is the data mover, including the data reader (reads data from the real memory system and acts as the producer for the accelerator) and data writer (writes data to the real memory system and acts as the consumer for the accelerator). Each data mover has its own address generation unit and works independently so that each data mover can do the data transfer as quickly as possible. + +The data reader gives read requests to the memory system (the address, and read signal, etc.) and gets the response. When all the responses of one transaction are obtained, it pushes the valid data into a FIFO for the accelerator to consume. If the accelerator is consumed successfully, the FIFO will pop the data. The data reader will keep getting new data from the real memory system (pre-fetch) until the FIFO is full or all the input data has been fetched. + +The data writer obtains valid data from the FIFO output (written by the accelerator) and sends write request to the memory system. When there is a valid output from the accelerator (and the FIFO is not full), the valid output will be pushed to the FIFO. When the FIFO is not empty, the data writer will keep sending write requests. When one write transaction is successful, the data will be pop from the FIFO. + +The StreamerTop module has a csrManage to manage the CSR read/write operations from the SNAX core. The Streamer has its own CSRs to store the configuration for the address generation and transaction number etc. The configuration, such as the temporal loop bound and strides, is written in the CSRs via a CsrManager when all the CSR configurations are valid. When doing the current transaction, the configuration for the next transaction operation can already be written into the CsrManager. When the current transaction finishes, the SNAX core can send the configuration valid signal then the CSR value in the CsrManager will be loaded in to the Streamer. + +## Parameters +`Parameters.scala` contains the list of all the parameters for the Streamer. Each hardware module has its parameter class. The table below lists the descriptions of all the parameter classes and the parameter formats for the Streamer. + +|Class| Parameters | Description | +| - | - | - | +|CommonParams | addrWidth | The bit width of the memory system address. | +| | tcdmDataWidth | Data width for each TCDM* port. | +|TemporalAddrGenUnitParams | loopDim | The dimension of the temporal loops = the number of for loops. | +| | loopBoundWidth | The bit width of the loop bounds. | +| | addrWidth | The bit width of the memory system address. | +|SpatialAddrGenUnitParams | loopDim | The number of nested spatial(parfor) loops. | +| | loopBounds | The bounds of each spatial(parfor) loop dimension. | +| | addrWidth | The bit width of the memory system address. | +| DataMoverParams | tcdmPortsNum | The number of TCDM ports connected to each data mover. | +| | spatialBounds | Spatial unrolling factors (your parfor) for each data mover. | +| | spatialDim | The dimension of spatial unrolling factors (your parfor) for each data mover. | +| | elementWidth | Single data element width for each data mover, useful for generating spatial addresses. | +| | fifoWidth |FIFO width. | +| StreamerParams | temporalAddrGenUnitParams | The parameters for the temporal address generation unit. Even though there are independent temporal address generation unit for each data mover, each data mover actually share the same temporal address generation unit parameters. | +| | stationarity | The parameters for stationarity for each data mover. If the stationarity bit is set, the innermost loop for that data mover is set to 1.| +| | dataReaderParams | A sequence of the parameters for dataReader. Each dataReader parameter can be different. The number of the parameters can also be any, meaning there is no constraint on how many the dataReader should be. | +| | dataWriterParams | Similar as dataReaderParams. A sequence of the parameters for dataWriter. Each dataWriter parameter can be different. The number of the parameters can also be any, meaning there is no constraint on how many the dataWriter should be. | +| | fifoReaderParams | A sequence of the parameters for the FIFOs for dataReaders. The sequence length should matches the length of dataReaderParams.| +| | fifoWriterParams | A sequence of the parameters for the FIFOs for dataWriters. The sequence length should matches the length of dataWriterParams.| + +*TCDM is the memory that the Streamer interacts to. + +### Instantiate Streamers + +A custom streamer can be generated by creating specific parameters according to the specification in `streamer/Params.scala`, and can then be used directly in Chisel. If you are using the SNAX flow instead, the paramers defined in `cfg/configuration.hjson` is used to generate these parameters automatically, and thus need not be defined manually. + +#### Example: Streamer parameter configuration for a simple MAC Engine +```scala +object MacStreamerParameters extends CommonParams { + + def MacScalingFactor = 4 + + def temporalAddrGenUnitParams: TemporalAddrGenUnitParams = + TemporalAddrGenUnitParams( + loopDim = 1, + loopBoundWidth = 8, + addrWidth + ) + + def fifoReaderParams: Seq[FIFOParams] = Seq( + FIFOParams(64 * MacScalingFactor, 2), + FIFOParams(64 * MacScalingFactor, 2), + FIFOParams(64, 2) + ) + + def fifoWriterParams: Seq[FIFOParams] = Seq( + FIFOParams(64 * MacScalingFactor, 2) + ) + + def stationarity = Seq(0, 0, 1, 1) + + def dataReaderParams: Seq[DataMoverParams] = Seq( + DataMoverParams( + tcdmPortsNum = 1 * MacScalingFactor, + spatialBounds = Seq(2 * MacScalingFactor), + spatialDim = 1, + elementWidth = 32, + fifoWidth = fifoReaderParams(0).width + ), + DataMoverParams( + tcdmPortsNum = 1 * MacScalingFactor, + spatialBounds = Seq(2 * MacScalingFactor), + spatialDim = 1, + elementWidth = 32, + fifoWidth = fifoReaderParams(1).width + ), + DataMoverParams( + tcdmPortsNum = 1, + spatialBounds = Seq(2), + spatialDim = 1, + elementWidth = 32, + fifoWidth = fifoReaderParams(2).width + ) + ) + + def dataWriterParams: Seq[DataMoverParams] = Seq( + DataMoverParams( + tcdmPortsNum = 1 * MacScalingFactor, + spatialBounds = Seq(2 * MacScalingFactor), + spatialDim = 1, + elementWidth = 32, + fifoWidth = fifoWriterParams(0).width + ) + ) + +} +``` +## Address generation description + +The access pattern for the Streamer is represented by a set of nested for loops. Every for loop can specify a _stride_ which we use to increment a base pointer. On top of this, some of the for loops can be _spatially unrolled_, to enable parallel data accesses by the accelerator. This splits address generation in two parts, the temporal address generation and spatial address generation. The temporal address is based on the temporal loop counters and the temporal strides configuration. The spatial address is based on the spatial unrolling factor of the accelerator and the spatial strides configuration. The spatial address generation unit will generate a unique address for each data element which is accessed in parallel. As there may be multiple data elements per memory word, these addresses are then merged for proper data alignment with the memory bank width. + +We formulate the address generation formula below: +``` +for(;;temporalbound_n-1) + for(;;temporalbound_n-2) + … + for(;;temporalbound_0) + parfor(;;spatialbound_m-1) + parfor(;;spatialbound_m-2) + … + parfor(;;spatialbound_0) + temporal_address = (temporal_unrolling_loop_counters.zip(tempStride).map { case (a, b) => a * b }).reduce(_ +& _) + spatial_address = (spatial_unrolling_loop_counters.zip(spatialStride).map { case (a, b) => a * b }).reduce(_ +& _) + add_o = base_ptr + temporal_address + spatial_address +``` + +Take a SIMD accelerator as an example (1 temporal loop and 1 spatial loop), Vu = 8, the address generation is: +``` +for (ti = 0 to VEC_LEN/Vu – 1): + parfor (si = 0 to Vu -1): + addr_o[Vu-1:0] = base_ptr + ti*(tempStride) +  [Vu-1:0]*(spatialStride) +``` +Which actually translates to: +``` +addr_o[0] = base_ptr + ti*(tempStride) +  [0]*(spatialStride) +addr_o[1] = base_ptr + ti*(tempStride) +  [1]*(spatialStride) +addr_o[2] = base_ptr + ti*(tempStride) +  [2]*(spatialStride) +addr_o[3] = base_ptr + ti*(tempStride) +  [3]*(spatialStride) +addr_o[4] = base_ptr + ti*(tempStride) +  [4]*(spatialStride) +addr_o[5] = base_ptr + ti*(tempStride) +  [5]*(spatialStride) +addr_o[6] = base_ptr + ti*(tempStride) +  [6]*(spatialStride) +addr_o[7] = base_ptr + ti*(tempStride) +  [7]*(spatialStride) +``` +Take GEMM Accelerator input data A as an example(3 temporal loop and 2 spatial loop), the address generation is: + +``` +for (m1 = 0 to M’/Mu-1) + for (n1 = 0 to N’/Nu-1) + for (k1 = 0 to K’/Ku-1) + parfor(0 to Mu) + parfor(0 to Ku) + temporal_address = m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + spatial_address = [Mu-1:0]*s_str_m + [Ku-1:0]*str_str_k + addr_o[Mu-1:0][Ku-1:0] = ptr_o + temporal_address + spatial_address + +``` +where `t_str_*` - are temporal strides and `s_str_*` - are spatial strides. + +Which translates to: +``` +addr_o[0][0] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [0]*s_str_m + [0]*str_str_k +addr_o[0][1] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [0]*s_str_m + [1]*str_str_k +addr_o[0][2] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [0]*s_str_m + [2]*str_str_k +… +addr_o[m][k] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [m]*s_str_m + [k]*str_str_k +… +addr_o[Mu-1][Ku-3] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [Mu-1]*s_str_m + [Ku-3]*str_str_k +addr_o[Mu-1][Ku-2] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [Mu-1]*s_str_m + [Ku-2]*str_str_k +addr_o[Mu-1][Ku-1] = ptr_o + m1*t_str_m1 + n1*t_str_n1 + k1*t_str_k1 + [Mu-1]*s_str_m + [Ku-1]*str_str_k +``` + + +## IO ports +The input and output ports of the Streamer are shown in the table below. + +The Streamer uses a simplified CSR request/response interface for CSR write/read operation. A more detailed description of the CSR operation interface can be found [here](https://kuleuven-micas.github.io/snitch_cluster/rm/snax_cluster.html). + +The Streamer uses a simplified TCDM request/response interface to read and write data from/to the TCDM. A more detailed description of the TCDM request/response interface can be found [here](https://kuleuven-micas.github.io/snitch_cluster/rm/snax_cluster.html). + +The Streamer uses the Decoupled interface for accelerator input and output data. A more detailed description of the Decoupled interface can be found [here](https://www.chisel-lang.org/docs/explanations/interfaces-and-connections#the-standard-ready-valid-interface-readyvalidio--decoupled). + +The simplified interface for the CSR and TCDM request/response contains the core signals and can be found at `TypeDefine.scala`. The complete signals for the CSR request/response interface and TCDM request/response interface will be added in the SystemVerilog wrapper. + +|Signal bundle| Signals | Signal name in generated SV | Width | Dir | Description | +| - | - | - | - | - | - | +| csr.req | data | io_csr_req_bits_data | 32| In| The write data from CSR request. | +| | addr | io_csr_req_bits_addr | 32| In| The address indicates which CSR to be wrote or read. | +| | write | io_csr_req_bits_write | 1| In| The signal indicates this request is for CSR write or read. | +| | valid | io_csr_req_valid | 1 | In| The signal indicates if this request is valid. | +| | ready | io_csr_req_ready | 1 | Out| The signal indicates if the accelerator is ready for this CSR operation.| +| csr.rsp | data | io_csr_rsp_bits_data | 32| Out| The response data for CSR read operation. | +| | valid | io_csr_rsp_valid | 1 | Out| The signal indicates if this response is valid. | +| | ready | io_csr_rsp_ready | 1 | In| The signal indicates if the SNAX core is ready for this CSR response. | +| tcdm_req | data | io_data_tcdm_req_0_bits_data | 64| Out| The data from TCDM request. This data is only valuable when it is a write request. | +| tcdm_req | addr | io_data_tcdm_req_0_bits_addr | 32| Out| The address from TCDM request. | +| tcdm_req | write | io_data_tcdm_req_0_bits_write | 1| Out| The signal indicates this request is for CSR write or read. | +| tcdm_req | valid | io_data_tcdm_req_0_valid | 1| Out| The signal indicates if this request is valid. | +| tcdm_req | ready | io_data_tcdm_req_0_ready | 1| Int| The signal indicates if the TCDM is ready for this CSR request. | +| | . | . | . | . | There can be a large number of tcdm_req ports depending on the spatial unrolling factors for the data readers and the data writers. tcdm_req ports for readers have lower index number. A detailed mapping for the tcdm_req ports and the data mover ports can be found at `Streamer.scala`.| +| tcdm_rsp | data | io_data_tcdm_rsp_0_bits_data | 64| In| The response data from the read request. | +| tcdm_rsp | valid | io_data_tcdm_rsp_0_valid | 1| In| The signal indicates if this response is valid. | +| | . | . | . | . | The tcdm_rsp ports number is the same as tcdm_req. | +| streamer2accelerator | data | io_data_streamer2accelerator_data_0_bits | First read FIFO width| Out | The data for the acceleratorX input. | +| streamer2accelerator | valid | io_data_streamer2accelerator_data_0_valid | 1| Out| The signal indicates if this data is valid. | +| streamer2accelerator | ready | io_data_streamer2accelerator_data_0_ready | 1| In| The signal indicates if the acceleratorX is ready for this data (has used this data already). | +| | . | . | . | . | The streamer2accelerator ports number is the same as data reader number. The index 0 corresponds to the first input data and 2 for second input data and so on. | +| accelerator2streamer | data | io_data_accelerator2streamer_data_0_bits | First write FIFO width| In| The data from the acceleratorX output. | +| accelerator2streamer | valid | io_data_accelerator2streamer_data_0_valid | 1| In| The signal indicates if this data is valid. | +| accelerator2streamer | ready | io_data_accelerator2streamer_data_0_ready | 1| Out| The signal indicates if the data writer is ready for taking in this data (not full). | +| | . | . | . | . | The accelerator2streamer ports number is the same as data writer number. The index 0 corresponds to the first output data and 2 for second output data (if any) and so on.| +| | | | | | | | + +### CSR definition +The offset below is defined by the SNAX core. A more detailed explanation of what are these configurations can be found at `StreamerTop.scala`. + +| Address | CSR name | Notes | +|---------|--------------------------|-------------------------------------| +| offset + [0..temporalDim - 1] | temporalLoopBoundCSRs | temporal loop bound for each temporal dimension. | +| offset + temporalDim + [0..temdataMoverNum * temporalDimporalDim - 1] | temporalLoopSrtidesCSRs | temporal loop strides for each temporal dimension and for each data mover. | +| offset + temporalDim + dataMoverNum * temporalDim + [0..spatialDim.sum - 1] | spatialLoopSrtidesCSRs | spatial loop strides for each data mover and for corresponding spatial dimension. The spatial dimension for each data mover can be different. It depends on the accelerator. | +| offset + temporalDim + dataMoverNum * temporalDim + spatialDim.sum + [0..dataMoverNum - 1] | basePtrCSRs | base pointers for each data mover. | +| offset + temporalDim + dataMoverNum * temporalDim + spatialDim.sum + dataMoverNum + 1| statusCSR | Performance counter for the busy state of the streamer module diff --git a/hw/chisel/src/main/scala/snax/csr_manager/CsrManager.scala b/hw/chisel/src/main/scala/snax/csr_manager/CsrManager.scala index d56052f95..826286158 100644 --- a/hw/chisel/src/main/scala/snax/csr_manager/CsrManager.scala +++ b/hw/chisel/src/main/scala/snax/csr_manager/CsrManager.scala @@ -115,15 +115,3 @@ class CsrManager( io.csr_config_out.bits <> csr } - -// Scala main function for generating CsrManager system verilog file -object CsrManager extends App { - emitVerilog( - new CsrManager( - csrManagerTestParameters.csrNum, - csrManagerTestParameters.csrAddrWidth, - csrManagerTestParameters.csrModuleTagName - ), - Array("--target-dir", "generated/csr_manager") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/DataMover.scala b/hw/chisel/src/main/scala/snax/streamer/DataMover.scala index fcdee2ebd..04082a7f0 100644 --- a/hw/chisel/src/main/scala/snax/streamer/DataMover.scala +++ b/hw/chisel/src/main/scala/snax/streamer/DataMover.scala @@ -15,7 +15,7 @@ import chisel3.util._ * @param params * The parameter class contains all the parameters of a data mover module */ -class DataMoverIO(params: DataMoverParams = DataMoverParams()) extends Bundle { +class DataMoverIO(params: DataMoverParams) extends Bundle { // signals for write request address generation val ptr_agu_i = Flipped(Decoupled(UInt(params.addrWidth.W))) @@ -46,7 +46,7 @@ class DataMoverIO(params: DataMoverParams = DataMoverParams()) extends Bundle { * The parameter class contains all the parameters of a data mover module */ class DataMover( - params: DataMoverParams = DataMoverParams(), + params: DataMoverParams, tagName: String = "" ) extends Module with RequireAsyncReset { @@ -222,7 +222,7 @@ class DataMover( // classes which extend the DataMover module, but are just // set to 0 here for testing purposes. class DataMoverTester( - params: DataMoverParams = DataMoverParams() + params: DataMoverParams ) extends DataMover(params) { for (i <- 0 until params.tcdmPortsNum) { diff --git a/hw/chisel/src/main/scala/snax/streamer/DataReader.scala b/hw/chisel/src/main/scala/snax/streamer/DataReader.scala index f09a13219..4e7751ca4 100644 --- a/hw/chisel/src/main/scala/snax/streamer/DataReader.scala +++ b/hw/chisel/src/main/scala/snax/streamer/DataReader.scala @@ -12,7 +12,7 @@ import chisel3.util._ * The parameter class contains all the parameters of a data mover module */ class DataReaderIO( - params: DataMoverParams = DataMoverParams() + params: DataMoverParams ) extends DataMoverIO(params) { // tcdm response @@ -40,7 +40,7 @@ class DataReaderIO( * The parameter class contains all the parameters of a data mover module */ class DataReader( - params: DataMoverParams = DataMoverParams(), + params: DataMoverParams, tagName: String = "" ) extends DataMover(params, tagName) { override val desiredName = tagName + "DataReader" @@ -135,11 +135,3 @@ class DataReader( } } - -// Scala main function for generating system verilog file for the DataReader module -object DataReader extends App { - emitVerilog( - new DataReader(DataMoverParams()), - Array("--target-dir", "generated/streamer") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/DataWriter.scala b/hw/chisel/src/main/scala/snax/streamer/DataWriter.scala index a4d50e3ee..ea4a91f31 100644 --- a/hw/chisel/src/main/scala/snax/streamer/DataWriter.scala +++ b/hw/chisel/src/main/scala/snax/streamer/DataWriter.scala @@ -11,7 +11,7 @@ import chisel3.util._ * The parameter class contains all the parameters of a data mover module */ class DataWriterIO( - params: DataMoverParams = DataMoverParams() + params: DataMoverParams ) extends DataMoverIO(params) { // valid data from the queue @@ -32,7 +32,7 @@ class DataWriterIO( * The parameter class contains all the parameters of a data mover module */ class DataWriter( - params: DataMoverParams = DataMoverParams(), + params: DataMoverParams, tagName: String = "" ) extends DataMover(params) { override val desiredName = tagName + "DataWriter" @@ -62,11 +62,3 @@ class DataWriter( io.data_fifo_i.ready := io.ptr_agu_i.ready } - -// Scala main function for generating system verilog file for the DataWriter module -object DataWriter extends App { - emitVerilog( - new DataWriter(DataMoverParams()), - Array("--target-dir", "generated/streamer") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/FIFO.scala b/hw/chisel/src/main/scala/snax/streamer/FIFO.scala index df68a68eb..15bd0ed0a 100644 --- a/hw/chisel/src/main/scala/snax/streamer/FIFO.scala +++ b/hw/chisel/src/main/scala/snax/streamer/FIFO.scala @@ -5,15 +5,15 @@ import chisel3.util._ // Customized FIFO with an extra almost_full signal. // almost_full will be asserted when there is Depth-1 elements in the FIFO -class FIFOIO(width: Int = FIFOTestParameters.fifoWidth) extends Bundle { +class FIFOIO(width: Int) extends Bundle { val in = Flipped(Decoupled(UInt(width.W))) val out = Decoupled(UInt(width.W)) val almost_full = Output(Bool()) } class FIFO( - depth: Int = FIFOTestParameters.fifoDepth, - width: Int = FIFOTestParameters.fifoWidth, + depth: Int, + width: Int, tagName: String = "" ) extends Module with RequireAsyncReset { @@ -32,10 +32,3 @@ class FIFO( } } - -object FIFO extends App { - emitVerilog( - new (FIFO), - Array("--target-dir", "generated/streamer") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/Parameters.scala b/hw/chisel/src/main/scala/snax/streamer/Parameters.scala index 3298d284c..18422ad90 100644 --- a/hw/chisel/src/main/scala/snax/streamer/Parameters.scala +++ b/hw/chisel/src/main/scala/snax/streamer/Parameters.scala @@ -26,9 +26,9 @@ trait CommonParams { * The bit width of the address. */ case class TemporalAddrGenUnitParams( - loopDim: Int = TemporalAddrGenUnitTestParameters.loopDim, - loopBoundWidth: Int = TemporalAddrGenUnitTestParameters.loopBoundWidth, - addrWidth: Int = TemporalAddrGenUnitTestParameters.addrWidth + loopDim: Int, + loopBoundWidth: Int, + addrWidth: Int ) /** This class represents all the parameters for the Spatial Address Generation @@ -41,9 +41,9 @@ case class TemporalAddrGenUnitParams( * The bit width of the address. */ case class SpatialAddrGenUnitParams( - loopDim: Int = SpatialAddrGenUnitTestParameters.loopDim, - loopBounds: Seq[Int] = SpatialAddrGenUnitTestParameters.loopBounds, - addrWidth: Int = SpatialAddrGenUnitTestParameters.addrWidth + loopDim: Int, + loopBounds: Seq[Int], + addrWidth: Int ) /** This class represents all the parameters for the Data Mover (including Data @@ -63,11 +63,11 @@ case class SpatialAddrGenUnitParams( * FIFO width */ case class DataMoverParams( - tcdmPortsNum: Int = DataMoverTestParameters.tcdmPortsNum, - spatialBounds: Seq[Int] = DataMoverTestParameters.spatialBounds, - spatialDim: Int = DataMoverTestParameters.spatialDim, - elementWidth: Int = DataMoverTestParameters.elementWidth, - fifoWidth: Int = DataMoverTestParameters.fifoWidth + tcdmPortsNum: Int, + spatialBounds: Seq[Int], + spatialDim: Int, + elementWidth: Int, + fifoWidth: Int ) extends CommonParams /** FIFO parameters @@ -173,15 +173,12 @@ trait HasStreamerInferredParams extends HasStreamerCoreParams { * default value of these parameters is from the StreamerTestConstant object */ case class StreamerParams( - temporalAddrGenUnitParams: TemporalAddrGenUnitParams = - StreamerTestConstant.temporalAddrGenUnitParams, - stationarity: Seq[Int] = StreamerTestConstant.stationarity, - dataReaderParams: Seq[DataMoverParams] = - StreamerTestConstant.dataReaderParams, - dataWriterParams: Seq[DataMoverParams] = - StreamerTestConstant.dataWriterParams, - fifoReaderParams: Seq[FIFOParams] = StreamerTestConstant.fifoReaderParams, - fifoWriterParams: Seq[FIFOParams] = StreamerTestConstant.fifoWriterParams, - tagName: String = StreamerTestConstant.tagName + temporalAddrGenUnitParams: TemporalAddrGenUnitParams, + stationarity: Seq[Int], + dataReaderParams: Seq[DataMoverParams], + dataWriterParams: Seq[DataMoverParams], + fifoReaderParams: Seq[FIFOParams], + fifoWriterParams: Seq[FIFOParams], + tagName: String = "" ) extends HasStreamerCoreParams with HasStreamerInferredParams diff --git a/hw/chisel/src/main/scala/snax/streamer/SpatialAddrGenUnit.scala b/hw/chisel/src/main/scala/snax/streamer/SpatialAddrGenUnit.scala index 454ca02b6..46495b870 100644 --- a/hw/chisel/src/main/scala/snax/streamer/SpatialAddrGenUnit.scala +++ b/hw/chisel/src/main/scala/snax/streamer/SpatialAddrGenUnit.scala @@ -79,7 +79,7 @@ trait WithSpatialLoopIndices { * The bit width of the address. */ class SpatialAddrGenUnit( - params: SpatialAddrGenUnitParams = SpatialAddrGenUnitParams(), + params: SpatialAddrGenUnitParams, tagName: String = "" ) extends Module with RequireAsyncReset @@ -157,11 +157,3 @@ class SpatialAddrGenUnit( ) } - -// Scala main function for generating system verilog file for the SpatialAddrGenUnit module -object SpatialAddrGenUnit extends App { - emitVerilog( - new SpatialAddrGenUnit(SpatialAddrGenUnitParams()), - Array("--target-dir", "generated/streamer") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/Streamer.scala b/hw/chisel/src/main/scala/snax/streamer/Streamer.scala index b7361ac41..f748b2c64 100644 --- a/hw/chisel/src/main/scala/snax/streamer/Streamer.scala +++ b/hw/chisel/src/main/scala/snax/streamer/Streamer.scala @@ -27,7 +27,7 @@ class DataFromAcceleratorX( // csr related io class StreamerCsrIO( - params: StreamerParams = StreamerParams() + params: StreamerParams ) extends Bundle { // configurations interface for a new data operation @@ -372,56 +372,3 @@ class Streamer( } } - -// Scala main function for generating test streamer system verilog file -object StreamerTester extends App { - emitVerilog( - new Streamer(StreamerParams()), - Array("--target-dir", "generated/streamer/tester") - ) -} - -// Scala main function for generating system verilog file for different accelerators -// including GEMM, Post-processing SIMD and MAC engine -object GemmStreamer extends App { - emitVerilog( - new Streamer( - StreamerParams( - temporalAddrGenUnitParams = - GeMMStreamerParameters.temporalAddrGenUnitParams, - fifoReaderParams = GeMMStreamerParameters.fifoReaderParams, - fifoWriterParams = GeMMStreamerParameters.fifoWriterParams, - stationarity = GeMMStreamerParameters.stationarity, - dataReaderParams = GeMMStreamerParameters.dataReaderParams, - dataWriterParams = GeMMStreamerParameters.dataWriterParams - ) - ), - Array("--target-dir", "generated/streamer/gemm") - ) -} - -object PostProcessingStreamer extends App { - emitVerilog( - new Streamer( - StreamerParams( - temporalAddrGenUnitParams = - PostProcessingStreamerParameters.temporalAddrGenUnitParams, - fifoReaderParams = PostProcessingStreamerParameters.fifoReaderParams, - fifoWriterParams = PostProcessingStreamerParameters.fifoWriterParams, - stationarity = PostProcessingStreamerParameters.stationarity, - dataReaderParams = PostProcessingStreamerParameters.dataReaderParams, - dataWriterParams = PostProcessingStreamerParameters.dataWriterParams - ) - ), - Array("--target-dir", "generated/streamer/pp") - ) -} - -object MacStreamer extends App { - emitVerilog( - new Streamer( - StreamerParams() - ), - Array("--target-dir", "generated/streamer/mac") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/StreamerInstanceParameters.scala b/hw/chisel/src/main/scala/snax/streamer/StreamerInstanceParameters.scala deleted file mode 100644 index e8616f18e..000000000 --- a/hw/chisel/src/main/scala/snax/streamer/StreamerInstanceParameters.scala +++ /dev/null @@ -1,151 +0,0 @@ -package snax.streamer - -import chisel3._ -import chisel3.util._ - -// streamer parameters for the GEMM Accelerator -object GeMMStreamerParameters extends CommonParams { - - def temporalAddrGenUnitParams: TemporalAddrGenUnitParams = - TemporalAddrGenUnitParams( - loopDim = 3, - loopBoundWidth = 8, - addrWidth - ) - - def fifoReaderParams: Seq[FIFOParams] = Seq( - FIFOParams(512, 2), - FIFOParams(512, 2) - ) - - def fifoWriterParams: Seq[FIFOParams] = Seq(FIFOParams(2048, 2)) - - def dataReaderParams: Seq[DataMoverParams] = Seq( - DataMoverParams( - tcdmPortsNum = 8, - spatialBounds = Seq(8, 8), - spatialDim = 2, - elementWidth = 8, - fifoWidth = fifoReaderParams(0).width - ), - DataMoverParams( - tcdmPortsNum = 8, - spatialBounds = Seq(8, 8), - spatialDim = 2, - elementWidth = 8, - fifoWidth = fifoReaderParams(1).width - ) - ) - - def dataWriterParams: Seq[DataMoverParams] = Seq( - DataMoverParams( - tcdmPortsNum = 32, - spatialBounds = Seq(8, 8), - spatialDim = 2, - elementWidth = 32, - fifoWidth = fifoWriterParams(0).width - ) - ) - - def stationarity = Seq(0, 0, 1) - -} - -// streamer parameters for the Post-processing SIMD Accelerator -object PostProcessingStreamerParameters extends CommonParams { - - def temporalAddrGenUnitParams: TemporalAddrGenUnitParams = - TemporalAddrGenUnitParams( - loopDim = 2, - loopBoundWidth = 8, - addrWidth - ) - - def fifoReaderParams: Seq[FIFOParams] = Seq( - FIFOParams(2048, 2) - ) - - def fifoWriterParams: Seq[FIFOParams] = Seq(FIFOParams(512, 2)) - - def dataReaderParams: Seq[DataMoverParams] = Seq( - DataMoverParams( - tcdmPortsNum = 32, - spatialBounds = Seq(8, 8), - spatialDim = 2, - elementWidth = 32, - fifoWidth = fifoReaderParams(0).width - ) - ) - - def dataWriterParams: Seq[DataMoverParams] = Seq( - DataMoverParams( - tcdmPortsNum = 8, - spatialBounds = Seq(8, 8), - spatialDim = 2, - elementWidth = 8, - fifoWidth = fifoWriterParams(0).width - ) - ) - - def stationarity = Seq(0, 0) - -} - -object MacStreamerParameters extends CommonParams { - - def MacScalingFactor = 4 - - def temporalAddrGenUnitParams: TemporalAddrGenUnitParams = - TemporalAddrGenUnitParams( - loopDim = 1, - loopBoundWidth = 8, - addrWidth - ) - - def fifoReaderParams: Seq[FIFOParams] = Seq( - FIFOParams(64 * MacScalingFactor, 2), - FIFOParams(64 * MacScalingFactor, 2), - FIFOParams(64, 2) - ) - - def fifoWriterParams: Seq[FIFOParams] = Seq( - FIFOParams(64 * MacScalingFactor, 2) - ) - - def stationarity = Seq(0, 0, 1, 1) - - def dataReaderParams: Seq[DataMoverParams] = Seq( - DataMoverParams( - tcdmPortsNum = 1 * MacScalingFactor, - spatialBounds = Seq(2 * MacScalingFactor), - spatialDim = 1, - elementWidth = 32, - fifoWidth = fifoReaderParams(0).width - ), - DataMoverParams( - tcdmPortsNum = 1 * MacScalingFactor, - spatialBounds = Seq(2 * MacScalingFactor), - spatialDim = 1, - elementWidth = 32, - fifoWidth = fifoReaderParams(1).width - ), - DataMoverParams( - tcdmPortsNum = 1, - spatialBounds = Seq(2), - spatialDim = 1, - elementWidth = 32, - fifoWidth = fifoReaderParams(2).width - ) - ) - - def dataWriterParams: Seq[DataMoverParams] = Seq( - DataMoverParams( - tcdmPortsNum = 1 * MacScalingFactor, - spatialBounds = Seq(2 * MacScalingFactor), - spatialDim = 1, - elementWidth = 32, - fifoWidth = fifoWriterParams(0).width - ) - ) - -} diff --git a/hw/chisel/src/main/scala/snax/streamer/StreamerTop.scala b/hw/chisel/src/main/scala/snax/streamer/StreamerTop.scala index 0fad72900..9602fbf89 100644 --- a/hw/chisel/src/main/scala/snax/streamer/StreamerTop.scala +++ b/hw/chisel/src/main/scala/snax/streamer/StreamerTop.scala @@ -14,7 +14,7 @@ import chisel3.experimental.{prefix, noPrefix} * the parameters class instantiation for the streamer top module */ class StreamerTopIO( - params: StreamerParams = StreamerParams(), + params: StreamerParams, csrAddrWidth: Int ) extends Bundle { @@ -34,7 +34,7 @@ class StreamerTopIO( * the parameters class instantiation for the streamer top module */ class StreamerTop( - params: StreamerParams = StreamerParams() + params: StreamerParams ) extends Module with RequireAsyncReset { @@ -153,83 +153,3 @@ class StreamerTop( io.data <> streamer.io.data } - -// Scala main function for generating test streamerTop system verilog file -object StreamerTopTester extends App { - emitVerilog( - new StreamerTop( - StreamerParams( - temporalAddrGenUnitParams = - StreamerTestConstant.temporalAddrGenUnitParams, - fifoReaderParams = StreamerTestConstant.fifoReaderParams, - fifoWriterParams = StreamerTestConstant.fifoWriterParams, - stationarity = StreamerTestConstant.stationarity, - dataReaderParams = StreamerTestConstant.dataReaderParams, - dataWriterParams = StreamerTestConstant.dataWriterParams - ) - ), - Array("--target-dir", "generated/streamertop/tester") - ) -} - -// streamertop for GEMM -object GeMMStreamerTop { - def main(args: Array[String]): Unit = { - val outPath = args.headOption.getOrElse("generated/streamertop/gemm") - emitVerilog( - new StreamerTop( - StreamerParams( - temporalAddrGenUnitParams = - GeMMStreamerParameters.temporalAddrGenUnitParams, - fifoReaderParams = GeMMStreamerParameters.fifoReaderParams, - fifoWriterParams = GeMMStreamerParameters.fifoWriterParams, - stationarity = GeMMStreamerParameters.stationarity, - dataReaderParams = GeMMStreamerParameters.dataReaderParams, - dataWriterParams = GeMMStreamerParameters.dataWriterParams, - tagName = "GeMM" - ) - ), - Array("--target-dir", outPath) - ) - } -} - -// streamertop for PP-SIMD -object PostProcessingStreamerTop { - def main(args: Array[String]): Unit = { - val outPath = args.headOption.getOrElse("generated/streamertop/simd") - emitVerilog( - new StreamerTop( - StreamerParams( - temporalAddrGenUnitParams = - PostProcessingStreamerParameters.temporalAddrGenUnitParams, - fifoReaderParams = PostProcessingStreamerParameters.fifoReaderParams, - fifoWriterParams = PostProcessingStreamerParameters.fifoWriterParams, - stationarity = PostProcessingStreamerParameters.stationarity, - dataReaderParams = PostProcessingStreamerParameters.dataReaderParams, - dataWriterParams = PostProcessingStreamerParameters.dataWriterParams, - tagName = "PostProcessingSIMD" - ) - ), - Array("--target-dir", outPath) - ) - } -} - -// streamertop for MAC -object MacStreamerTop extends App { - emitVerilog( - new StreamerTop( - StreamerParams( - temporalAddrGenUnitParams = - MacStreamerParameters.temporalAddrGenUnitParams, - fifoReaderParams = MacStreamerParameters.fifoReaderParams, - fifoWriterParams = MacStreamerParameters.fifoWriterParams, - stationarity = MacStreamerParameters.stationarity, - dataReaderParams = MacStreamerParameters.dataReaderParams, - dataWriterParams = MacStreamerParameters.dataWriterParams - ) - ), - Array("--target-dir", "generated/streamertop/mac") - ) -} diff --git a/hw/chisel/src/main/scala/snax/streamer/TemporalAddrGenUnit.scala b/hw/chisel/src/main/scala/snax/streamer/TemporalAddrGenUnit.scala index 51df5e900..c8d9db388 100644 --- a/hw/chisel/src/main/scala/snax/streamer/TemporalAddrGenUnit.scala +++ b/hw/chisel/src/main/scala/snax/streamer/TemporalAddrGenUnit.scala @@ -67,7 +67,7 @@ class TemporalAddrGenUnitIO( * The bit width of the addresses. */ class TemporalAddrGenUnit( - params: TemporalAddrGenUnitParams = TemporalAddrGenUnitParams(), + params: TemporalAddrGenUnitParams, tagName: String = "" ) extends Module with RequireAsyncReset { @@ -207,11 +207,3 @@ class TemporalAddrGenUnit( io.done := addr_gen_finish } - -// Scala main function for generating system verilog file for the TemporalAddrGenUnit module -object TemporalAddrGenUnit extends App { - emitVerilog( - new TemporalAddrGenUnit(TemporalAddrGenUnitParams()), - Array("--target-dir", "generated/streamer") - ) -} diff --git a/hw/chisel/src/test/scala/snax/csr_manager/CsrManagerGenerate.scala b/hw/chisel/src/test/scala/snax/csr_manager/CsrManagerGenerate.scala new file mode 100644 index 000000000..464d50b1e --- /dev/null +++ b/hw/chisel/src/test/scala/snax/csr_manager/CsrManagerGenerate.scala @@ -0,0 +1,16 @@ +package snax.csr_manager + +import chisel3._ +import org.scalatest.flatspec.AnyFlatSpec +import chiseltest._ + +class CsrManagerTopGenerate +extends AnyFlatSpec { + + emitVerilog( + new CsrManager(10, 32), + Array("--target-dir", "generated/") + ) + +} + diff --git a/hw/chisel/src/main/scala/snax/csr_manager/CsrManagerTestParameters.scala b/hw/chisel/src/test/scala/snax/csr_manager/CsrManagerTestParameters.scala similarity index 100% rename from hw/chisel/src/main/scala/snax/csr_manager/CsrManagerTestParameters.scala rename to hw/chisel/src/test/scala/snax/csr_manager/CsrManagerTestParameters.scala diff --git a/hw/chisel/src/test/scala/snax/streamer/DataReaderTest.scala b/hw/chisel/src/test/scala/snax/streamer/DataReaderTest.scala index 214365b50..ebf050daf 100644 --- a/hw/chisel/src/test/scala/snax/streamer/DataReaderTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/DataReaderTest.scala @@ -14,7 +14,7 @@ class DataReaderTest with ChiselScalatestTester with Matchers { "DUT" should "pass" in { - test(new DataReader(DataMoverParams())).withAnnotations( + test(new DataReader(TestParameters.dataMover)).withAnnotations( Seq(WriteVcdAnnotation) ) { dut => dut.clock.step(5) @@ -23,7 +23,7 @@ class DataReaderTest dut.io.spatialStrides_csr_i.bits(1).poke(8) dut.io.spatialStrides_csr_i.valid.poke(1) dut.io.data_fifo_o.ready.poke(1) - for (i <- 0 until DataMoverTestParameters.tcdmPortsNum) { + for (i <- 0 until TestParameters.dataMover.tcdmPortsNum) { dut.io.tcdm_req(i).ready.poke(1) dut.io.tcdm_rsp(i).bits.data.poke(1) } diff --git a/hw/chisel/src/test/scala/snax/streamer/DataWriterTest.scala b/hw/chisel/src/test/scala/snax/streamer/DataWriterTest.scala index 65f73e158..1881cc038 100644 --- a/hw/chisel/src/test/scala/snax/streamer/DataWriterTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/DataWriterTest.scala @@ -14,7 +14,7 @@ class DataWriterTest with ChiselScalatestTester with Matchers { "DUT" should "pass" in { - test(new DataWriter(DataMoverParams())).withAnnotations( + test(new DataWriter(TestParameters.dataMover)).withAnnotations( Seq(WriteVcdAnnotation) ) { dut => dut.clock.step(5) @@ -23,7 +23,7 @@ class DataWriterTest dut.io.spatialStrides_csr_i.bits(1).poke(8) dut.io.spatialStrides_csr_i.valid.poke(1) dut.io.data_fifo_i.valid.poke(1) - for (i <- 0 until DataMoverTestParameters.tcdmPortsNum) { + for (i <- 0 until TestParameters.dataMover.tcdmPortsNum) { dut.io.tcdm_req(i).ready.poke(1) } dut.clock.step(5) diff --git a/hw/chisel/src/test/scala/snax/streamer/FIFOTest.scala b/hw/chisel/src/test/scala/snax/streamer/FIFOTest.scala index 73b4651c5..1478a705a 100644 --- a/hw/chisel/src/test/scala/snax/streamer/FIFOTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/FIFOTest.scala @@ -9,7 +9,9 @@ import org.scalatest.Tag class FIFOTest extends AnyFlatSpec with ChiselScalatestTester with Matchers { "DUT" should "pass" in { - test(new FIFO) + test(new FIFO( + width=TestParameters.fifo.width, + depth=TestParameters.fifo.depth)) .withAnnotations( Seq(WriteVcdAnnotation) ) { dut => diff --git a/hw/chisel/src/test/scala/snax/streamer/SpatialAddrGenUnitTest.scala b/hw/chisel/src/test/scala/snax/streamer/SpatialAddrGenUnitTest.scala index d759c8d24..8c88d6e47 100644 --- a/hw/chisel/src/test/scala/snax/streamer/SpatialAddrGenUnitTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/SpatialAddrGenUnitTest.scala @@ -14,20 +14,20 @@ class SpatialAddrGenUnitTest with ChiselScalatestTester with Matchers { "DUT" should "pass" in { - test(new SpatialAddrGenUnit(SpatialAddrGenUnitParams())).withAnnotations( + test(new SpatialAddrGenUnit(TestParameters.spatialAddrGenUnit)).withAnnotations( Seq(WriteVcdAnnotation) ) { dut => dut.clock.step(5) // random config generation val strides = - Seq.fill(SpatialAddrGenUnitTestParameters.loopDim)( + Seq.fill(TestParameters.spatialAddrGenUnit.loopDim)( ( scala.util.Random.nextInt(10) ) ) // sending these configuration to the dut - for (i <- 0 until SpatialAddrGenUnitTestParameters.loopDim) { + for (i <- 0 until TestParameters.spatialAddrGenUnit.loopDim) { val stride = strides(i).U dut.io.strides_i(i).poke(stride) } @@ -38,11 +38,11 @@ class SpatialAddrGenUnitTest // check the result (for loopDim = 2) val ptr_0 = 16 - for (i <- 0 until SpatialAddrGenUnitTestParameters.loopBounds(0)) { - for (j <- 0 until SpatialAddrGenUnitTestParameters.loopBounds(1)) { + for (i <- 0 until TestParameters.spatialAddrGenUnit.loopBounds(0)) { + for (j <- 0 until TestParameters.spatialAddrGenUnit.loopBounds(1)) { val ptr = ptr_0 + i * strides(0) + j * strides(1) dut.io - .ptr_o(i + j * SpatialAddrGenUnitTestParameters.loopBounds(0)) + .ptr_o(i + j * TestParameters.spatialAddrGenUnit.loopBounds(0)) .expect(ptr) } } diff --git a/hw/chisel/src/test/scala/snax/streamer/StreamerTest.scala b/hw/chisel/src/test/scala/snax/streamer/StreamerTest.scala index 014e2efc7..b17aa6dd5 100644 --- a/hw/chisel/src/test/scala/snax/streamer/StreamerTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/StreamerTest.scala @@ -12,7 +12,7 @@ class StreamerTest with ChiselScalatestTester with Matchers { "DUT" should "pass" in { - test(new Streamer(StreamerParams())) + test(new Streamer(TestParameters.streamer)) .withAnnotations( Seq(WriteVcdAnnotation) ) { dut => @@ -20,12 +20,12 @@ class StreamerTest // give valid transaction config dut.io.csr.valid.poke(1.B) - for (i <- 0 until StreamerParams().temporalDim) { + for (i <- 0 until TestParameters.streamer.temporalDim) { dut.io.csr.bits.loopBounds_i(i).poke(2) } // give the proper spatial strides so that is a aligned in one TCDM bank - for (i <- 0 until StreamerParams().dataMoverNum) { + for (i <- 0 until TestParameters.streamer.dataMoverNum) { dut.io.csr.bits.spatialStrides_csr_i(i)(0).poke(4) } @@ -35,37 +35,37 @@ class StreamerTest dut.clock.step(5) // give tcdm ports signals, no contention scene - for (i <- 0 until StreamerParams().dataReaderTcdmPorts.sum) { + for (i <- 0 until TestParameters.streamer.dataReaderTcdmPorts.sum) { dut.io.data.tcdm_req(i).ready.poke(1.B) dut.io.data.tcdm_rsp(i).valid.poke(1.B) } dut.clock.step(10) // give accelerator signals - for (i <- 0 until StreamerParams().dataReaderNum) { + for (i <- 0 until TestParameters.streamer.dataReaderNum) { dut.io.data.streamer2accelerator.data(i).ready.poke(1.B) } dut.clock.step(10) - for (i <- 0 until StreamerParams().dataReaderTcdmPorts.sum) { + for (i <- 0 until TestParameters.streamer.dataReaderTcdmPorts.sum) { dut.io.data.tcdm_req(i).ready.poke(0.B) dut.io.data.tcdm_rsp(i).valid.poke(0.B) } // mimic accelerator gives valid data dut.clock.step(10) - for (i <- 0 until StreamerParams().dataWriterNum) { + for (i <- 0 until TestParameters.streamer.dataWriterNum) { dut.io.data.accelerator2streamer.data(i).valid.poke(1.B) } dut.clock.step(4) - for (i <- 0 until StreamerParams().dataWriterNum) { + for (i <- 0 until TestParameters.streamer.dataWriterNum) { dut.io.data.accelerator2streamer.data(i).valid.poke(0.B) } // mimic tcdm is ready for write request - for (i <- 0 until StreamerParams().dataWriterTcdmPorts.sum) { + for (i <- 0 until TestParameters.streamer.dataWriterTcdmPorts.sum) { dut.io.data - .tcdm_req(i + StreamerParams().dataReaderTcdmPorts.sum) + .tcdm_req(i + TestParameters.streamer.dataReaderTcdmPorts.sum) .ready .poke(1.B) } diff --git a/hw/chisel/src/main/scala/snax/streamer/StreamerTestParameter.scala b/hw/chisel/src/test/scala/snax/streamer/StreamerTestParameter.scala similarity index 63% rename from hw/chisel/src/main/scala/snax/streamer/StreamerTestParameter.scala rename to hw/chisel/src/test/scala/snax/streamer/StreamerTestParameter.scala index 9d4e2993d..b1369d4ec 100644 --- a/hw/chisel/src/main/scala/snax/streamer/StreamerTestParameter.scala +++ b/hw/chisel/src/test/scala/snax/streamer/StreamerTestParameter.scala @@ -5,31 +5,6 @@ import chisel3.util._ /* the meaning of these testing parameters can be found at Parameter.scala */ -object TemporalAddrGenUnitTestParameters { - def loopDim = 3 - def loopBoundWidth = 8 - def addrWidth = 32 - -} - -object SpatialAddrGenUnitTestParameters { - def loopBounds = Seq(8, 8) - def loopDim = loopBounds.length - def addrWidth = 32 -} - -object DataMoverTestParameters { - def tcdmPortsNum = 8 - def tcdmDataWidth = 64 - def spatialBounds = Seq(8, 8) - def addrWidth = 32 - def fifoWidth = 512 - def elementWidth = 8 - - def spatialDim = spatialBounds.length - -} - object StreamerTestConstant extends CommonParams { def MacScalingFactor = 4 @@ -90,7 +65,40 @@ object StreamerTestConstant extends CommonParams { def tagName: String = "" } -object FIFOTestParameters { - def fifoWidth = 512 - def fifoDepth = 4 +object TestParameters { + val streamer = StreamerParams( + temporalAddrGenUnitParams = StreamerTestConstant.temporalAddrGenUnitParams, + stationarity = StreamerTestConstant.stationarity, + dataReaderParams = StreamerTestConstant.dataReaderParams, + dataWriterParams = StreamerTestConstant.dataWriterParams, + fifoReaderParams = StreamerTestConstant.fifoReaderParams, + fifoWriterParams = StreamerTestConstant.fifoWriterParams, + tagName = "abc" + ) + + val temporalAddrGenUnit = TemporalAddrGenUnitParams( + loopDim = 3, + loopBoundWidth = 8, + addrWidth = 32 + ) + + val spatialAddrGenUnit = SpatialAddrGenUnitParams ( + loopBounds = Seq(8, 8), + loopDim = 2, + addrWidth = 32 + ) + + val dataMover = DataMoverParams ( + tcdmPortsNum = 8, + spatialBounds = Seq(8, 8), + fifoWidth = 512, + elementWidth = 8, + spatialDim = 2, + ) + + val fifo = FIFOParams ( + width = 512, + depth = 4 + ) } + diff --git a/hw/chisel/src/test/scala/snax/streamer/StreamerTopGenerate.scala b/hw/chisel/src/test/scala/snax/streamer/StreamerTopGenerate.scala new file mode 100644 index 000000000..f45778bd6 --- /dev/null +++ b/hw/chisel/src/test/scala/snax/streamer/StreamerTopGenerate.scala @@ -0,0 +1,16 @@ +package snax.streamer + +import chisel3._ +import org.scalatest.flatspec.AnyFlatSpec +import chiseltest._ + +class StreamerTopGenerate +extends AnyFlatSpec { + + emitVerilog( + new StreamerTop(TestParameters.streamer), + Array("--target-dir", "generated/") + ) + +} + diff --git a/hw/chisel/src/test/scala/snax/streamer/StreamerTopTest.scala b/hw/chisel/src/test/scala/snax/streamer/StreamerTopTest.scala index 600dc05a8..4f2f3bb2f 100644 --- a/hw/chisel/src/test/scala/snax/streamer/StreamerTopTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/StreamerTopTest.scala @@ -13,11 +13,11 @@ class StreamerTopTest with ChiselScalatestTester with Matchers { "DUT" should "pass" in { - test(new StreamerTop(new StreamerParams())) + test(new StreamerTop(TestParameters.streamer)) .withAnnotations( Seq(WriteVcdAnnotation) ) { dut => - dut.clock.step(5) + dut.clock.step(5) // write csr helper function def write_csr(addr: Int, data: Int) = { @@ -66,7 +66,7 @@ class StreamerTopTest // give valid transaction config // temporal loop bound - val temporal_loop_bound = 20 + val temporal_loop_bound = 20 write_csr(0, temporal_loop_bound) // temporal loop strides @@ -107,39 +107,39 @@ class StreamerTopTest dut.clock.step(5) // give tcdm ports signals, no contention scene - for (i <- 0 until StreamerParams().dataReaderTcdmPorts.sum) { + for (i <- 0 until TestParameters.streamer.dataReaderTcdmPorts.sum) { dut.io.data.tcdm_req(i).ready.poke(1.B) dut.io.data.tcdm_rsp(i).valid.poke(1.B) } // give accelerator ready to get input signals - for (i <- 0 until StreamerParams().dataReaderNum) { + for (i <- 0 until TestParameters.streamer.dataReaderNum) { dut.io.data.streamer2accelerator.data(i).ready.poke(1.B) } // wait for temporal_loop_bound cycles dut.clock.step(temporal_loop_bound * 2) - for (i <- 0 until StreamerParams().dataReaderTcdmPorts.sum) { + for (i <- 0 until TestParameters.streamer.dataReaderTcdmPorts.sum) { dut.io.data.tcdm_req(i).ready.poke(0.B) dut.io.data.tcdm_rsp(i).valid.poke(0.B) } // mimic accelerator gives valid data - for (i <- 0 until StreamerParams().dataWriterNum) { + for (i <- 0 until TestParameters.streamer.dataWriterNum) { dut.io.data.accelerator2streamer.data(i).valid.poke(1.B) } // mimic tcdm is ready for write request - for (i <- 0 until StreamerParams().dataWriterTcdmPorts.sum) { + for (i <- 0 until TestParameters.streamer.dataWriterTcdmPorts.sum) { dut.io.data - .tcdm_req(i + StreamerParams().dataReaderTcdmPorts.sum) + .tcdm_req(i + TestParameters.streamer.dataReaderTcdmPorts.sum) .ready .poke(1.B) } // wait for temporal_loop_bound cycles dut.clock.step(temporal_loop_bound * 2) - for (i <- 0 until StreamerParams().dataWriterNum) { + for (i <- 0 until TestParameters.streamer.dataWriterNum) { dut.io.data.accelerator2streamer.data(i).valid.poke(0.B) } diff --git a/hw/chisel/src/test/scala/snax/streamer/TemporalAddrGenUnitTest.scala b/hw/chisel/src/test/scala/snax/streamer/TemporalAddrGenUnitTest.scala index 2ea89da9a..913472910 100644 --- a/hw/chisel/src/test/scala/snax/streamer/TemporalAddrGenUnitTest.scala +++ b/hw/chisel/src/test/scala/snax/streamer/TemporalAddrGenUnitTest.scala @@ -15,7 +15,7 @@ class TemporalAddrGenUnitTest with Matchers with WithSpatialLoopIndices { "DUT" should "pass" in { - test(new TemporalAddrGenUnit(TemporalAddrGenUnitParams())).withAnnotations( + test(new TemporalAddrGenUnit(TestParameters.temporalAddrGenUnit)).withAnnotations( Seq(WriteVcdAnnotation) ) { dut => def test_once() = { @@ -23,20 +23,20 @@ class TemporalAddrGenUnitTest // random config generation val base_ptr = scala.util.Random.nextInt(100) val loopBounds = - Seq.fill(TemporalAddrGenUnitTestParameters.loopDim)( + Seq.fill(TestParameters.temporalAddrGenUnit.loopDim)( ( scala.util.Random.nextInt(5) + 1 ) ) val strides = - Seq.fill(TemporalAddrGenUnitTestParameters.loopDim)( + Seq.fill(TestParameters.temporalAddrGenUnit.loopDim)( ( scala.util.Random.nextInt(10) ) ) // sending these configuration to the dut - for (i <- 0 until TemporalAddrGenUnitTestParameters.loopDim) { + for (i <- 0 until TestParameters.temporalAddrGenUnit.loopDim) { val lb = loopBounds(i).U val ts = strides(i).U dut.io.loopBounds_i.bits(i).poke(lb) @@ -65,7 +65,7 @@ class TemporalAddrGenUnitTest while (dut.io.done.peekBoolean() == false) { val indices = genSpatialLoopIndices( - TemporalAddrGenUnitTestParameters.loopDim, + TestParameters.temporalAddrGenUnit.loopDim, loopBounds, counter )