-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* bare simd init * add lanelen para * address commits
- Loading branch information
1 parent
ee29d73
commit 686870c
Showing
3 changed files
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,4 +12,6 @@ object SIMDConstant { | |
def constantType = 8 | ||
def constantMulType = 32 | ||
|
||
// SIMD parallelism | ||
def laneLen = 64 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package simd | ||
|
||
import chisel3._ | ||
import chisel3.util._ | ||
import chisel3.VecInit | ||
|
||
// post-processing SIMD data interface | ||
// one big input port, one big output port | ||
class SIMDDataIO extends Bundle { | ||
// a multi-data input, decoupled interface for handshake | ||
val input_i = | ||
Flipped(Decoupled(UInt((SIMDConstant.laneLen * SIMDConstant.inputType).W))) | ||
|
||
// a multi-data output, decoupled interface for handshake | ||
val out_o = Decoupled( | ||
UInt((SIMDConstant.laneLen * SIMDConstant.outputType).W) | ||
) | ||
|
||
} | ||
|
||
// post-processing SIMD input and output declaration | ||
class SIMDIO extends Bundle { | ||
// the input data across different PEs shares the same control signal | ||
val ctrl = Flipped(Decoupled(new PECtrl())) | ||
// decoupled data ports | ||
val data = new SIMDDataIO() | ||
} | ||
|
||
// post-processing SIMD module | ||
// This module implements this spec: specification: https://gist.github.com/jorendumoulin/83352a1e84501ec4a7b3790461fee2bf in parallel | ||
class SIMD(laneLen: Int = SIMDConstant.laneLen) | ||
extends Module | ||
with RequireAsyncReset { | ||
val io = IO(new SIMDIO()) | ||
|
||
// generating parallel PEs | ||
val lane = Seq.fill(laneLen)(Module(new PE())) | ||
|
||
// control csr registers for storing the control data | ||
val ctrl_csr = Reg(new PECtrl()) | ||
|
||
// result from different PEs | ||
val result = Wire( | ||
Vec(SIMDConstant.laneLen, SInt(SIMDConstant.outputType.W)) | ||
) | ||
// storing the result in case needs to output multi-cycles | ||
val out_reg = RegInit( | ||
0.U((SIMDConstant.laneLen * SIMDConstant.outputType).W) | ||
) | ||
|
||
// the receiver isn't ready, needs to send several cycles | ||
val keep_output = RegInit(0.B) | ||
|
||
// when config valid, store the configuration for later computation | ||
when(io.ctrl.valid) { | ||
ctrl_csr := io.ctrl.bits | ||
} | ||
|
||
// always ready for configuration | ||
io.ctrl.ready := 1.B | ||
|
||
// give each PE right control signal and data | ||
// collect the result of each PE | ||
for (i <- 0 until laneLen) { | ||
lane(i).io.ctrl_i := ctrl_csr | ||
lane(i).io.input_i := io.data.input_i | ||
.bits( | ||
(i + 1) * SIMDConstant.inputType - 1, | ||
(i) * SIMDConstant.inputType | ||
) | ||
.asSInt | ||
lane(i).io.valid_i := io.data.input_i.valid | ||
result(i) := lane(i).io.out_o | ||
} | ||
|
||
// always valid for new input on less is sending last output | ||
io.data.input_i.ready := !keep_output | ||
|
||
// if out valid but not ready, keep sneding output valid signal | ||
keep_output := io.data.out_o.valid & !io.data.out_o.ready | ||
|
||
// if data out is valid from PEs, store the results in case later needs keep sending output data if receiver side is not ready | ||
when(lane(0).io.valid_o) { | ||
out_reg := Cat(result) | ||
} | ||
|
||
// concat every result to a big data bus for output | ||
// if is keep sending output, send the stored result | ||
io.data.out_o.bits := Mux(keep_output, out_reg, Cat(result)) | ||
|
||
// first valid from PE or keep sending valid if receiver side is not ready | ||
io.data.out_o.valid := lane(0).io.valid_o || keep_output | ||
|
||
} | ||
|
||
// Scala main function for generating system verilog file for the post-processing SIMD module | ||
object SIMD extends App { | ||
emitVerilog( | ||
new SIMD(SIMDConstant.laneLen), | ||
Array("--target-dir", "generated/simd") | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package simd | ||
|
||
import chisel3._ | ||
import org.scalatest.flatspec.AnyFlatSpec | ||
import chiseltest._ | ||
import scala.math.BigInt | ||
import org.scalatest.matchers.should.Matchers | ||
import org.scalatest.Tag | ||
|
||
// post-processing SIMD manually-generated random data test | ||
// TODO: automated a brunch of random test data (parallel) generation and check | ||
class SIMDManualTest | ||
extends AnyFlatSpec | ||
with ChiselScalatestTester | ||
with Matchers { | ||
"DUT" should "pass" in { | ||
test(new SIMD) | ||
.withAnnotations( | ||
Seq(WriteVcdAnnotation) | ||
) { dut => | ||
// function wrapper for sending the configuration and the input data to the SIMD | ||
def verify( | ||
input: BigInt, | ||
input_zp: Byte, | ||
output_zp: Byte, | ||
multiplier: Int, | ||
shift: Byte, | ||
max_int: Byte, | ||
min_int: Byte | ||
) = { | ||
dut.clock.step() | ||
|
||
// giving the configuration | ||
dut.io.ctrl.bits.input_zp_i.poke(input_zp) | ||
dut.io.ctrl.bits.output_zp_i.poke(output_zp) | ||
dut.io.ctrl.bits.multiplier_i.poke(multiplier) | ||
dut.io.ctrl.bits.shift_i.poke(shift) | ||
dut.io.ctrl.bits.max_int_i.poke(max_int) | ||
dut.io.ctrl.bits.min_int_i.poke(min_int) | ||
dut.io.ctrl.bits.double_round_i.poke(1) | ||
dut.io.ctrl.valid.poke(1.B) | ||
dut.clock.step() | ||
dut.io.ctrl.valid.poke(0) | ||
|
||
// giving input data | ||
dut.clock.step() | ||
dut.io.data.input_i.bits.poke(input) | ||
dut.clock.step() | ||
|
||
// manually check SIMD output | ||
val out = dut.io.data.out_o.bits.peekInt() & ((1 << 8) - 1) | ||
println(out) | ||
val out1 = dut.io.data.out_o.bits.peekInt() & (((1 << 8) - 1) << 8) | ||
println(out1) | ||
|
||
dut.clock.step() | ||
} | ||
|
||
// function to translate integer to hex for packing two integer into one big BigInt | ||
def int2hex(width: Int, intValue: Int) = { | ||
val paddingChar = '0' | ||
f"$intValue%x".reverse.padTo(width, paddingChar).reverse | ||
} | ||
|
||
// random data test | ||
var a = BigInt(int2hex(8, 267082502) + int2hex(8, 267082502), 16) | ||
verify(a, -59, -118, 8192, 34, 127, -128) | ||
|
||
a = BigInt(int2hex(8, 71671912) + int2hex(8, 71671912), 16) | ||
verify(a, -23, -126, 65536, 37, 127, -128) | ||
|
||
} | ||
} | ||
} |