Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First FPGA with TPAR/AS merging #338

Merged
merged 23 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Define base directory of firmware-hls clone.
FIRMWARE=$(shell git rev-parse --show-toplevel)
# MODIFY THESE LINES WHEN COPYING AND PASTING THIS MAKEFILE
TOP_FUNCS=$(FIRMWARE)/TopFunctions/CombinedConfig
TOP_FUNCS=$(FIRMWARE)/TopFunctions/CombinedConfig_FPGA2
MODULES=InputRouterTop_IR_DTC_PS10G_1_A InputRouterTop_IR_DTC_PS10G_1_B InputRouterTop_IR_DTC_PS10G_2_A InputRouterTop_IR_DTC_PS10G_2_B InputRouterTop_IR_DTC_PS10G_3_A InputRouterTop_IR_DTC_PS10G_3_B InputRouterTop_IR_DTC_PS10G_4_A InputRouterTop_IR_DTC_PS10G_4_B InputRouterTop_IR_DTC_PS_1_A InputRouterTop_IR_DTC_PS_1_B InputRouterTop_IR_DTC_PS_2_A InputRouterTop_IR_DTC_PS_2_B InputRouterTop_IR_DTC_negPS10G_1_A InputRouterTop_IR_DTC_negPS10G_1_B InputRouterTop_IR_DTC_negPS10G_2_A InputRouterTop_IR_DTC_negPS10G_2_B InputRouterTop_IR_DTC_negPS10G_3_A InputRouterTop_IR_DTC_negPS10G_3_B InputRouterTop_IR_DTC_negPS10G_4_A InputRouterTop_IR_DTC_negPS10G_4_B InputRouterTop_IR_DTC_negPS_1_A InputRouterTop_IR_DTC_negPS_1_B InputRouterTop_IR_DTC_negPS_2_A InputRouterTop_IR_DTC_negPS_2_B InputRouterTop_IR_DTC_2S_1_A InputRouterTop_IR_DTC_2S_1_B InputRouterTop_IR_DTC_2S_2_A InputRouterTop_IR_DTC_2S_2_B InputRouterTop_IR_DTC_2S_3_A InputRouterTop_IR_DTC_2S_3_B InputRouterTop_IR_DTC_2S_4_A InputRouterTop_IR_DTC_2S_4_B InputRouterTop_IR_DTC_2S_5_A InputRouterTop_IR_DTC_2S_5_B InputRouterTop_IR_DTC_2S_6_A InputRouterTop_IR_DTC_2S_6_B InputRouterTop_IR_DTC_neg2S_1_A InputRouterTop_IR_DTC_neg2S_1_B InputRouterTop_IR_DTC_neg2S_2_A InputRouterTop_IR_DTC_neg2S_2_B InputRouterTop_IR_DTC_neg2S_3_A InputRouterTop_IR_DTC_neg2S_3_B InputRouterTop_IR_DTC_neg2S_4_A InputRouterTop_IR_DTC_neg2S_4_B InputRouterTop_IR_DTC_neg2S_5_A InputRouterTop_IR_DTC_neg2S_5_B InputRouterTop_IR_DTC_neg2S_6_A InputRouterTop_IR_DTC_neg2S_6_B VMRouterCMTop_L1PHIA VMRouterCMTop_L1PHIB VMRouterCMTop_L1PHIC VMRouterCMTop_L1PHID VMRouterCMTop_L1PHIE VMRouterCMTop_L1PHIF VMRouterCMTop_L1PHIG VMRouterCMTop_L1PHIH VMRouterCMTop_L2PHIA VMRouterCMTop_L2PHIB VMRouterCMTop_L2PHIC VMRouterCMTop_L2PHID VMRouterCMTop_L3PHIA VMRouterCMTop_L3PHIB VMRouterCMTop_L3PHIC VMRouterCMTop_L3PHID VMRouterCMTop_L4PHIA VMRouterCMTop_L4PHIB VMRouterCMTop_L4PHIC VMRouterCMTop_L4PHID VMRouterCMTop_L5PHIA VMRouterCMTop_L5PHIB VMRouterCMTop_L5PHIC VMRouterCMTop_L5PHID VMRouterCMTop_L6PHIA VMRouterCMTop_L6PHIB VMRouterCMTop_L6PHIC VMRouterCMTop_L6PHID VMRouterCMTop_D1PHIA VMRouterCMTop_D1PHIB VMRouterCMTop_D1PHIC VMRouterCMTop_D1PHID VMRouterCMTop_D2PHIA VMRouterCMTop_D2PHIB VMRouterCMTop_D2PHIC VMRouterCMTop_D2PHID VMRouterCMTop_D3PHIA VMRouterCMTop_D3PHIB VMRouterCMTop_D3PHIC VMRouterCMTop_D3PHID VMRouterCMTop_D4PHIA VMRouterCMTop_D4PHIB VMRouterCMTop_D4PHIC VMRouterCMTop_D4PHID VMRouterCMTop_D5PHIA VMRouterCMTop_D5PHIB VMRouterCMTop_D5PHIC VMRouterCMTop_D5PHID TrackletProcessor_L1L2A TrackletProcessor_L1L2B TrackletProcessor_L1L2C TrackletProcessor_L1L2D TrackletProcessor_L1L2E TrackletProcessor_L1L2F TrackletProcessor_L1L2G TrackletProcessor_L1L2H TrackletProcessor_L1L2I TrackletProcessor_L1L2J TrackletProcessor_L1L2K TrackletProcessor_L1L2L TrackletProcessor_L2L3A TrackletProcessor_L2L3B TrackletProcessor_L2L3C TrackletProcessor_L2L3D TrackletProcessor_L3L4A TrackletProcessor_L3L4B TrackletProcessor_L3L4C TrackletProcessor_L3L4D TrackletProcessor_L5L6A TrackletProcessor_L5L6B TrackletProcessor_L5L6C TrackletProcessor_L5L6D TrackletProcessor_D1D2A TrackletProcessor_D1D2B TrackletProcessor_D1D2C TrackletProcessor_D1D2D TrackletProcessor_D3D4A TrackletProcessor_D3D4B TrackletProcessor_D3D4C TrackletProcessor_D3D4D TrackletProcessor_L1D1A TrackletProcessor_L1D1B TrackletProcessor_L1D1C TrackletProcessor_L1D1D TrackletProcessor_L1D1E TrackletProcessor_L1D1F TrackletProcessor_L1D1G TrackletProcessor_L1D1H TrackletProcessor_L2D1A TrackletProcessor_L2D1B TrackletProcessor_L2D1C TrackletProcessor_L2D1D

# Include rules for making the project.
Expand Down
137 changes: 137 additions & 0 deletions IntegrationTests/common/hdl/tf_merge_streamer.vhd
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
--===========================================================================
--! @file
--! @brief Module which reads and streams out the contents of the memories
--! at the end of the first half of the TF algo.
--! @author Jason Fan ([email protected])
--! @date 2024-02-29
--! @version v.1.0
--===========================================================================

--! Standard library
library ieee;
--! Standard package
use ieee.std_logic_1164.all;
--! Signed/unsigned calculations
use ieee.numeric_std.all;
--! Standard functions
library std;
--! Standard TextIO functions
use std.textio.all;

--! Xilinx library
library unisim;
--! Xilinx package
use unisim.vcomponents.all;
use work.tf_pkg.all;

entity tf_merge_streamer is
generic (
RAM_WIDTH : natural := 72;
NUM_PAGES : natural := 8;
RAM_DEPTH : natural := NUM_PAGES * PAGE_LENGTH;
NUM_INPUTS : natural := 4;
NUM_EXTRA_BITS: natural := 2;
ADDR_WIDTH : natural := 7
);
port (
bx_in : in std_logic_vector(2 downto 0 );
rst: in std_logic;
clk : in std_logic;
--output read enable to tf_mem modules
enb_arr: out std_logic_vector(NUM_INPUTS-1 downto 0);
bx_out : out std_logic_vector(2 downto 0);
--output merged stream, includes input word, up to 2 bits that encode the
--original module, and a valid bit (from LSB to MSB)
merged_dout : out std_logic_vector(RAM_WIDTH+NUM_EXTRA_BITS downto 0);
--input data,nent and addresses are best suited for unconstrained arrays
--but this is not supported in vivado 2019
--module always accepts 4 input memories, but will not use all of them
din0: in std_logic_vector(RAM_WIDTH-1 downto 0);
din1: in std_logic_vector(RAM_WIDTH-1 downto 0);
din2: in std_logic_vector(RAM_WIDTH-1 downto 0);
din3: in std_logic_vector(RAM_WIDTH-1 downto 0);
nent0: in t_arr_7b(0 to NUM_PAGES-1);
nent1: in t_arr_7b(0 to NUM_PAGES-1);
nent2: in t_arr_7b(0 to NUM_PAGES-1);
nent3: in t_arr_7b(0 to NUM_PAGES-1);
addr_arr: out std_logic_vector(NUM_INPUTS*CLOGB2(RAM_DEPTH)-1 downto 0)
) ;
end entity tf_merge_streamer;

architecture RTL of tf_merge_streamer is

constant MAX_INPUTS : integer := 4;
constant pipe_stages : integer := 4;

type mem_count_arr is array(NUM_INPUTS-1 downto 0) of integer;
type toread_arr is array(pipe_stages-1 downto 0) of integer;

--nent and din are repackaged from odd input type into
--arrays
type nent_array is array(MAX_INPUTS-1 downto 0) of t_arr_7b(0 to NUM_PAGES-1);
type din_array is array(MAX_INPUTS-1 downto 0) of std_logic_vector(RAM_WIDTH-1 downto 0);

signal valid : std_logic_vector(pipe_stages-1 downto 0) := (others => '0');
signal readmask : std_logic_vector(NUM_INPUTS-1 downto 0) := (others => '0');

begin
process(clk)
variable nent_arr: nent_array;
variable din_arr: din_array;
variable bx_last :integer :=0;
variable mem_count : mem_count_arr := (others => 0);
variable current_page: natural := 0;
variable bx_change : boolean := false; -- indicates to the module whether or not the bx has changed compared to the previous clock

variable toread : toread_arr := (others => 0);

begin
if rising_edge(clk) then
nent_arr := (nent3,nent2,nent1,nent0); --repackage nent and din as arrays
din_arr := (din3, din2, din1, din0);
bx_change := (bx_last /= to_integer(unsigned(bx_in)));
if (bx_change) then --reset with rst signal or a change in bx
-- check if bx changes and update page to read from
mem_count := (others => 0);
end if ;
current_page := to_integer(unsigned(bx_in)) mod NUM_PAGES;
--check if memory read counter is less than nentries
for i in 0 to NUM_INPUTS-1 loop
if ((mem_count(i)) < to_integer(unsigned(nent_arr(i)(current_page)))) then
readmask(i) <= '1';
else
readmask(i) <= '0';
end if;
end loop;
if (to_integer(unsigned(readmask)) = 0) then
valid(0) <= '0';
else
for j in 0 to NUM_INPUTS-1 loop
if readmask((j + toread(0) + 1) mod NUM_INPUTS) = '1' then
toread(0) := (j + toread(0) + 1 ) mod NUM_INPUTS;
exit;
end if;
end loop;
addr_arr(((toread(0)+1)*clogb2(RAM_DEPTH))-1 downto (toread(0))*clogb2(RAM_DEPTH)) <= std_logic_vector(to_unsigned(current_page*page_length + mem_count(toread(0)), clogb2(RAM_DEPTH)));
valid(0) <= '1';
mem_count(toread(0)) := mem_count(toread(0)) + 1;
end if;

if valid(pipe_stages-1) ='1' then
if (NUM_EXTRA_BITS > 0) then
merged_dout <= '1' & std_logic_vector(to_unsigned(toread(pipe_stages-1),NUM_EXTRA_BITS)) & din_arr(toread(pipe_stages-1));
else
merged_dout <= '1' & din_arr(toread(pipe_stages-2));
end if ;
else
merged_dout <= (others => '0');
end if;
bx_last := to_integer(unsigned(bx_in));
bx_out <= bx_in;
for j in 0 to pipe_stages-2 loop
toread(j+1) := toread(j);
valid(j+1) <= valid(j);
end loop;
end if;
end process;
end RTL;
5 changes: 2 additions & 3 deletions TestBenches/TrackletProcessor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,7 @@ std::cout<<module_name[MODULE_];
// print the input files loaded
std::cout << "Loaded the input files:\n";
for (unsigned i = 0; i < nInnerStubMems; i++)
//(i+2)%nInnerStubMems has the innerstubs match the order in the emulation
std::cout << "\t" << tb.fileNames(innerStubPattern).at((i+2)%nInnerStubMems) << "\n";
std::cout << "\t" << tb.fileNames(innerStubPattern).at(i) << "\n";
for (unsigned i = 0; i < nOuterStubMems; i++)
std::cout << "\t" << tb.fileNames(outerStubPattern).at(i) << "\n";
for (unsigned i = 0; i < nOuterVMStubMems; i++)
Expand All @@ -142,7 +141,7 @@ std::cout<<module_name[MODULE_];

// read event and write to memories
for (unsigned i = 0; i < nInnerStubMems; i++)
writeMemFromFile<AllStubInnerMemory<InnerStubType> >(innerStubs[i], fin_innerStubs.at((i+2)%nInnerStubMems), ievt);
writeMemFromFile<AllStubInnerMemory<InnerStubType> >(innerStubs[i], fin_innerStubs.at(i), ievt);
for (unsigned i = 0; i < nOuterStubMems; i++)
writeMemFromFile<AllStubMemory<OuterStubType> >(outerStubs[i], fin_outerStubs.at(i), ievt);
for (unsigned i = 0; i < nOuterVMStubMems; i++)
Expand Down
9 changes: 4 additions & 5 deletions TrackletAlgorithm/TrackletProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1145,11 +1145,10 @@ teunits[k].idle_;

//Extract the current stub - check if valid. Calculate next stub. Check if valid
istub__ = tebuffer.getIStub();
bool validstub = istub__ < innerStubs[imem].getEntries(bx);

bool validstub = istub__ < innerStubs[(imem+2)%NASMemInner].getEntries(bx);

ap_uint<kNBits_MemAddr> istubnext = istub__+1;
bool validstubnext=istubnext<innerStubs[imem].getEntries(bx);
bool validstubnext=istubnext<innerStubs[(imem+2)%NASMemInner].getEntries(bx);

//Calculate good stub - true if:
//validmem is true - meaning thart we have not exhausted all stub memories
Expand All @@ -1159,10 +1158,10 @@ teunits[k].idle_;
//Update istub if goodstub
tebuffer.getIStub()=goodstub__?(validstubnext?istubnext:ap_uint<kNBits_MemAddr>(0)):istub__;
//Update imem if the next stub is not valid
tebuffer.getMem()=((goodstub__&&(!validstubnext))||(innerStubs[imem].getEntries(bx) == 0&&validmem))?imemnext:imem;
tebuffer.getMem()=((goodstub__&&(!validstubnext))||(innerStubs[(imem+2)%NASMemInner].getEntries(bx) == 0&&validmem))?imemnext:imem;

//Read stub from memory - BRAM with latency of one or two clks
stub__ = AllStubInner<innerASType>(innerStubs[imem].read_mem(bx,istub__).raw());
stub__ = AllStubInner<innerASType>(innerStubs[(imem+2)%NASMemInner].read_mem(bx,istub__).raw());

//Update TEUnit data for next loop

Expand Down
19 changes: 11 additions & 8 deletions emData/download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ cp ../LUTsCM2/memorymodules.dat reducedcm2_memorymodules.dat
cp ../LUTsCMBarrel/wires.dat cmbarrel_wires.dat
cp ../LUTsCMBarrel/processingmodules.dat cmbarrel_processingmodules.dat
cp ../LUTsCMBarrel/memorymodules.dat cmbarrel_memorymodules.dat
cp ../LUTsSplit/wires.dat fpga1_wires.dat
cp ../LUTsSplit/processingmodules.dat fpga1_processingmodules.dat
cp ../LUTsSplit/memorymodules.dat fpga1_memorymodules.dat
# grep, awk, and sed should be fixed in CMSSW - no we can use the config from
# CMSSW instead of a hand made configuration. But it still needs tweaking...
grep -v vmstuboutPHI ../LUTsSplit/wires.dat | grep -v TP_ | grep -v IR_ > fpga2_wires.dat
Expand Down Expand Up @@ -197,6 +200,7 @@ echo "TrackletParameters: MPAR_L2D1ABCDin [73]" >> fpga2_memorymodules.dat

grep -v TP_ ../LUTsSplit/processingmodules.dat | grep -v VMR_ | grep -v IR_ > fpga2_processingmodules.dat
sed -i 's/VMStubMERouter/VMSMERouter/g' fpga2_processingmodules.dat
sed -i 's/VMStubMERouter/VMSMERouter/g' fpga1_processingmodules.dat

./makeReducedConfig.py --no-graph -t "TP" -s "C" -o "reducedcm_"
cp -fv ../LUTsCM2/wires.dat ../LUTsCM2/memorymodules.dat ../LUTsCM2/processingmodules.dat ./
Expand Down Expand Up @@ -226,16 +230,15 @@ mv -fv memUtil_pkg.vhd SectorProcessor.vhd SectorProcessorFull.vhd ../../Integra
mv -fv tb_tf_top.vhd ../../IntegrationTests/CombinedBarrelConfig/tb/
### Combined IRtoTP
echo "CombinedIRtoTP"
cp -fv ../LUTsCM/wires.dat ../LUTsCM/memorymodules.dat ../LUTsCM/processingmodules.dat ./
./generator_hdl.py ../../ --no_graph --mut IR -u 0 -d 2 -w wires.dat -p processingmodules.dat -m memorymodules.dat -de 1 -sp
./generator_hdl.py ../../ --no_graph --mut IR -u 0 -d 2 -w wires.dat -p processingmodules.dat -m memorymodules.dat -de 1 -x -sp
mkdir -p ../../IntegrationTests/CombinedConfig/IRtoTP/{hdl,tb}
mv -fv memUtil_pkg.vhd SectorProcessor.vhd SectorProcessorFull.vhd ../../IntegrationTests/CombinedConfig/IRtoTP/hdl/
mv -fv tb_tf_top.vhd ../../IntegrationTests/CombinedConfig/IRtoTP/tb/
./generator_hdl.py ../../ --no_graph --mut IR -u 0 -d 2 -w fpga1_wires.dat -p fpga1_processingmodules.dat -m fpga1_memorymodules.dat -de 1 -sp 1
./generator_hdl.py ../../ --no_graph --mut IR -u 0 -d 2 -w fpga1_wires.dat -p fpga1_processingmodules.dat -m fpga1_memorymodules.dat -de 1 -x -sp 1
mkdir -p ../../IntegrationTests/CombinedConfig_FPGA1/{hdl,tb}
mv -fv memUtil_pkg.vhd SectorProcessor.vhd SectorProcessorFull.vhd ../../IntegrationTests/CombinedConfig_FPGA1/hdl/
mv -fv tb_tf_top.vhd ../../IntegrationTests/CombinedConfig_FPGA1/tb/
### Combined PC/VMSMER to TB
echo "CM FPGA2"
./generator_hdl.py ../../ --no_graph --split --fpga2 --mut PC -u 0 -d 2 -w fpga2_wires.dat -p fpga2_processingmodules.dat -m fpga2_memorymodules.dat -de 1
./generator_hdl.py ../../ --no_graph --split --fpga2 --mut PC -u 0 -d 2 -w fpga2_wires.dat -p fpga2_processingmodules.dat -m fpga2_memorymodules.dat -de 1 -x
./generator_hdl.py ../../ --no_graph --sp 2 --mut PC -u 0 -d 2 -w fpga2_wires.dat -p fpga2_processingmodules.dat -m fpga2_memorymodules.dat -de 1
./generator_hdl.py ../../ --no_graph --sp 2 --mut PC -u 0 -d 2 -w fpga2_wires.dat -p fpga2_processingmodules.dat -m fpga2_memorymodules.dat -de 1 -x
mkdir -p ../../IntegrationTests/CombinedConfig_FPGA2/{hdl,tb}
mv -fv memUtil_pkg.vhd SectorProcessor.vhd SectorProcessorFull.vhd ../../IntegrationTests/CombinedConfig_FPGA2/hdl/
mv -fv tb_tf_top.vhd ../../IntegrationTests/CombinedConfig_FPGA2/tb/
Expand Down