Skip to content

Commit

Permalink
Add bare post-processing simd (#3)
Browse files Browse the repository at this point in the history
* bare simd init

* add lanelen para

* address commits
  • Loading branch information
xiaoling-yi authored Jan 19, 2024
1 parent ee29d73 commit 686870c
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/main/scala/simd/Parameter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ object SIMDConstant {
def constantType = 8
def constantMulType = 32

// SIMD parallelism
def laneLen = 64
}
102 changes: 102 additions & 0 deletions src/main/scala/simd/SIMD.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package simd

import chisel3._
import chisel3.util._
import chisel3.VecInit

// post-processing SIMD data interface
// one big input port, one big output port
class SIMDDataIO extends Bundle {
// a multi-data input, decoupled interface for handshake
val input_i =
Flipped(Decoupled(UInt((SIMDConstant.laneLen * SIMDConstant.inputType).W)))

// a multi-data output, decoupled interface for handshake
val out_o = Decoupled(
UInt((SIMDConstant.laneLen * SIMDConstant.outputType).W)
)

}

// post-processing SIMD input and output declaration
class SIMDIO extends Bundle {
// the input data across different PEs shares the same control signal
val ctrl = Flipped(Decoupled(new PECtrl()))
// decoupled data ports
val data = new SIMDDataIO()
}

// post-processing SIMD module
// This module implements this spec: specification: https://gist.github.com/jorendumoulin/83352a1e84501ec4a7b3790461fee2bf in parallel
class SIMD(laneLen: Int = SIMDConstant.laneLen)
extends Module
with RequireAsyncReset {
val io = IO(new SIMDIO())

// generating parallel PEs
val lane = Seq.fill(laneLen)(Module(new PE()))

// control csr registers for storing the control data
val ctrl_csr = Reg(new PECtrl())

// result from different PEs
val result = Wire(
Vec(SIMDConstant.laneLen, SInt(SIMDConstant.outputType.W))
)
// storing the result in case needs to output multi-cycles
val out_reg = RegInit(
0.U((SIMDConstant.laneLen * SIMDConstant.outputType).W)
)

// the receiver isn't ready, needs to send several cycles
val keep_output = RegInit(0.B)

// when config valid, store the configuration for later computation
when(io.ctrl.valid) {
ctrl_csr := io.ctrl.bits
}

// always ready for configuration
io.ctrl.ready := 1.B

// give each PE right control signal and data
// collect the result of each PE
for (i <- 0 until laneLen) {
lane(i).io.ctrl_i := ctrl_csr
lane(i).io.input_i := io.data.input_i
.bits(
(i + 1) * SIMDConstant.inputType - 1,
(i) * SIMDConstant.inputType
)
.asSInt
lane(i).io.valid_i := io.data.input_i.valid
result(i) := lane(i).io.out_o
}

// always valid for new input on less is sending last output
io.data.input_i.ready := !keep_output

// if out valid but not ready, keep sneding output valid signal
keep_output := io.data.out_o.valid & !io.data.out_o.ready

// if data out is valid from PEs, store the results in case later needs keep sending output data if receiver side is not ready
when(lane(0).io.valid_o) {
out_reg := Cat(result)
}

// concat every result to a big data bus for output
// if is keep sending output, send the stored result
io.data.out_o.bits := Mux(keep_output, out_reg, Cat(result))

// first valid from PE or keep sending valid if receiver side is not ready
io.data.out_o.valid := lane(0).io.valid_o || keep_output

}

// Scala main function for generating system verilog file for the post-processing SIMD module
object SIMD extends App {
emitVerilog(
new SIMD(SIMDConstant.laneLen),
Array("--target-dir", "generated/simd")
)
}
74 changes: 74 additions & 0 deletions src/test/scala/simd/SIMDTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package simd

import chisel3._
import org.scalatest.flatspec.AnyFlatSpec
import chiseltest._
import scala.math.BigInt
import org.scalatest.matchers.should.Matchers
import org.scalatest.Tag

// post-processing SIMD manually-generated random data test
// TODO: automated a brunch of random test data (parallel) generation and check
class SIMDManualTest
extends AnyFlatSpec
with ChiselScalatestTester
with Matchers {
"DUT" should "pass" in {
test(new SIMD)
.withAnnotations(
Seq(WriteVcdAnnotation)
) { dut =>
// function wrapper for sending the configuration and the input data to the SIMD
def verify(
input: BigInt,
input_zp: Byte,
output_zp: Byte,
multiplier: Int,
shift: Byte,
max_int: Byte,
min_int: Byte
) = {
dut.clock.step()

// giving the configuration
dut.io.ctrl.bits.input_zp_i.poke(input_zp)
dut.io.ctrl.bits.output_zp_i.poke(output_zp)
dut.io.ctrl.bits.multiplier_i.poke(multiplier)
dut.io.ctrl.bits.shift_i.poke(shift)
dut.io.ctrl.bits.max_int_i.poke(max_int)
dut.io.ctrl.bits.min_int_i.poke(min_int)
dut.io.ctrl.bits.double_round_i.poke(1)
dut.io.ctrl.valid.poke(1.B)
dut.clock.step()
dut.io.ctrl.valid.poke(0)

// giving input data
dut.clock.step()
dut.io.data.input_i.bits.poke(input)
dut.clock.step()

// manually check SIMD output
val out = dut.io.data.out_o.bits.peekInt() & ((1 << 8) - 1)
println(out)
val out1 = dut.io.data.out_o.bits.peekInt() & (((1 << 8) - 1) << 8)
println(out1)

dut.clock.step()
}

// function to translate integer to hex for packing two integer into one big BigInt
def int2hex(width: Int, intValue: Int) = {
val paddingChar = '0'
f"$intValue%x".reverse.padTo(width, paddingChar).reverse
}

// random data test
var a = BigInt(int2hex(8, 267082502) + int2hex(8, 267082502), 16)
verify(a, -59, -118, 8192, 34, 127, -128)

a = BigInt(int2hex(8, 71671912) + int2hex(8, 71671912), 16)
verify(a, -23, -126, 65536, 37, 127, -128)

}
}
}

0 comments on commit 686870c

Please sign in to comment.