diff --git a/arches/big_core.yaml b/arches/big_core.yaml index 37566b97..bc6d9286 100644 --- a/arches/big_core.yaml +++ b/arches/big_core.yaml @@ -25,7 +25,7 @@ top.cpu.core0.extension.core_extensions: pipelines: [ ["sys"], # exe0 - ["int", "div"], # exe1 + ["int", "div", "vset"], # exe1 ["int", "mul"], # exe2 ["int", "mul", "i2f", "cmov"], # exe3 ["int"], # exe4 @@ -57,29 +57,32 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/arches/isa_json/gen_uarch_rv64v_json.py b/arches/isa_json/gen_uarch_rv64v_json.py index 5c258be0..6616ea5a 100755 --- a/arches/isa_json/gen_uarch_rv64v_json.py +++ b/arches/isa_json/gen_uarch_rv64v_json.py @@ -13,7 +13,23 @@ "vsetivli" : {"pipe" : "vset", "latency" : 1}, # TODO: Vector Loads and Stores: Vector Unit-Stride Instructions + "vse8.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vse16.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vse32.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vse64.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vle8.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vle16.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vle32.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vle64.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, # TODO: Vector Loads and Stores: Vector Strided Instructions + "vsse8.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vsse16.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vsse32.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vsse64.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vlse8.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vlse16.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vlse32.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, + "vlse64.v" : {"pipe" : "vlsu", "uop_gen" : "ARITH", "latency" : 1}, # TODO: Vector Loads and Stores: Vector Indexed Instructions # TODO: Vector Loads and Stores: Unit-stride Fault-Only-First Loads # TODO: Vector Loads and Stores: Vector Load/Store Segment Instructions diff --git a/arches/isa_json/olympia_uarch_rv64v.json b/arches/isa_json/olympia_uarch_rv64v.json index 118e5b97..fe31c7c5 100644 --- a/arches/isa_json/olympia_uarch_rv64v.json +++ b/arches/isa_json/olympia_uarch_rv64v.json @@ -799,9 +799,9 @@ }, { "mnemonic": "vle16.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vle16ff.v", @@ -811,9 +811,9 @@ }, { "mnemonic": "vle32.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vle32ff.v", @@ -823,9 +823,9 @@ }, { "mnemonic": "vle64.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vle64ff.v", @@ -835,9 +835,9 @@ }, { "mnemonic": "vle8.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vle8ff.v", @@ -877,27 +877,27 @@ }, { "mnemonic": "vlse16.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vlse32.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vlse64.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vlse8.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vluxei16.v", @@ -1693,27 +1693,27 @@ }, { "mnemonic": "vse16.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vse32.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vse64.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vse8.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsetivli", @@ -1885,27 +1885,27 @@ }, { "mnemonic": "vsse16.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsse32.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsse64.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsse8.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vlsu", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vssra.vi", diff --git a/arches/medium_core.yaml b/arches/medium_core.yaml index 661f8e47..8e4ff36e 100644 --- a/arches/medium_core.yaml +++ b/arches/medium_core.yaml @@ -50,26 +50,29 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] diff --git a/arches/small_core.yaml b/arches/small_core.yaml index 67cb94db..b5e465d5 100644 --- a/arches/small_core.yaml +++ b/arches/small_core.yaml @@ -42,23 +42,26 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 32743498..6ad28733 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(core IssueQueue.cpp ROB.cpp LSU.cpp + VLSU.cpp MMU.cpp DCache.cpp MavisUnit.cpp diff --git a/core/CPUFactories.hpp b/core/CPUFactories.hpp index 1a875ea9..3c602f8a 100644 --- a/core/CPUFactories.hpp +++ b/core/CPUFactories.hpp @@ -12,6 +12,7 @@ #include "Dispatch.hpp" #include "Execute.hpp" #include "LSU.hpp" +#include "VLSU.hpp" #include "MMU.hpp" #include "SimpleTLB.hpp" #include "BIU.hpp" @@ -77,6 +78,10 @@ namespace olympia{ sparta::ResourceFactory lsu_rf; + //! \brief Resource Factory to build a LSU Unit + sparta::ResourceFactory vlsu_rf; + //! \brief Resouce Factory to build a L2Cache Unit sparta::ResourceFactory l2cache_rf; diff --git a/core/CPUTopology.cpp b/core/CPUTopology.cpp index d8fdb0a1..16807d38 100644 --- a/core/CPUTopology.cpp +++ b/core/CPUTopology.cpp @@ -108,6 +108,14 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ sparta::TreeNode::GROUP_IDX_NONE, &factories->lsu_rf }, + { + "vlsu", + "cpu.core*", + "Vector Load-Store Unit", + sparta::TreeNode::GROUP_NAME_NONE, + sparta::TreeNode::GROUP_IDX_NONE, + &factories->vlsu_rf + }, { "l2cache", "cpu.core*", @@ -192,6 +200,14 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ "cpu.core*.dispatch.ports.in_lsu_credits", "cpu.core*.lsu.ports.out_lsu_credits" }, + { + "cpu.core*.dispatch.ports.out_vlsu_write", + "cpu.core*.vlsu.ports.in_vlsu_insts" + }, + { + "cpu.core*.dispatch.ports.in_vlsu_credits", + "cpu.core*.vlsu.ports.out_vlsu_credits" + }, { "cpu.core*.dispatch.ports.out_reorder_buffer_write", "cpu.core*.rob.ports.in_reorder_buffer_write" @@ -216,6 +232,22 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ "cpu.core*.dcache.ports.out_lsu_free_req", "cpu.core*.lsu.ports.in_cache_free_req" }, + { + "cpu.core*.vlsu.ports.out_cache_lookup_req", + "cpu.core*.dcache.ports.in_lsu_lookup_req" + }, + { + "cpu.core*.dcache.ports.out_vlsu_lookup_ack", + "cpu.core*.vlsu.ports.in_cache_lookup_ack" + }, + { + "cpu.core*.dcache.ports.out_vlsu_lookup_req", + "cpu.core*.vlsu.ports.in_cache_lookup_req" + }, + { + "cpu.core*.dcache.ports.out_vlsu_free_req", + "cpu.core*.vlsu.ports.in_cache_free_req" + }, { "cpu.core*.dcache.ports.out_l2cache_req", "cpu.core*.l2cache.ports.in_dcache_l2cache_req" @@ -256,6 +288,22 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ "cpu.core*.mmu.ports.out_lsu_free_req", "cpu.core*.lsu.ports.in_mmu_free_req" }, + { + "cpu.core*.vlsu.ports.out_mmu_lookup_req", + "cpu.core*.mmu.ports.in_lsu_lookup_req" + }, + { + "cpu.core*.mmu.ports.out_vlsu_lookup_ack", + "cpu.core*.vlsu.ports.in_mmu_lookup_ack" + }, + { + "cpu.core*.mmu.ports.out_vlsu_lookup_req", + "cpu.core*.vlsu.ports.in_mmu_lookup_req" + }, + { + "cpu.core*.mmu.ports.out_vlsu_free_req", + "cpu.core*.vlsu.ports.in_mmu_free_req" + }, { "cpu.core*.biu.ports.out_mss_req_sync", "cpu.core*.mss.ports.in_mss_req_sync" @@ -272,6 +320,14 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ "cpu.core*.rob.ports.out_rob_retire_ack", "cpu.core*.lsu.ports.in_rob_retire_ack" }, + { + "cpu.core*.rob.ports.out_rob_retire_ack", + "cpu.core*.vlsu.ports.in_rob_retire_ack" + }, + { + "cpu.core*.rob.ports.out_rob_retire_ack_vlsu", + "cpu.core*.vlsu.ports.in_rob_retire_ack" + }, { "cpu.core*.rob.ports.out_rob_retire_ack_rename", "cpu.core*.rename.ports.in_rename_retire_ack" @@ -300,6 +356,10 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ "cpu.core*.flushmanager.ports.out_flush_upper", "cpu.core*.lsu.ports.in_reorder_flush" }, + { + "cpu.core*.flushmanager.ports.out_flush_upper", + "cpu.core*.vlsu.ports.in_reorder_flush" + }, { "cpu.core*.flushmanager.ports.out_flush_upper", "cpu.core*.fetch.ports.in_fetch_flush_redirect" diff --git a/core/DCache.cpp b/core/DCache.cpp index 11b9fa40..c0a8a113 100644 --- a/core/DCache.cpp +++ b/core/DCache.cpp @@ -129,7 +129,14 @@ namespace olympia if (hit) { mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT); - out_lsu_lookup_ack_.send(mem_access_info_ptr); + if(mem_access_info_ptr->getInstPtr()->isVector()) + { + out_vlsu_lookup_ack_.send(mem_access_info_ptr); + } + else + { + out_lsu_lookup_ack_.send(mem_access_info_ptr); + } return; } @@ -140,7 +147,14 @@ namespace olympia { // Should be Nack but miss should work for now mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::MISS); - out_lsu_lookup_ack_.send(mem_access_info_ptr); + if(mem_access_info_ptr->getInstPtr()->isVector()) + { + out_vlsu_lookup_ack_.send(mem_access_info_ptr); + } + else + { + out_lsu_lookup_ack_.send(mem_access_info_ptr); + } return; } @@ -179,7 +193,14 @@ namespace olympia (*mshr_it)->setMemRequest(mem_access_info_ptr); mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::MISS); } - out_lsu_lookup_ack_.send(mem_access_info_ptr); + if(mem_access_info_ptr->getInstPtr()->isVector()) + { + out_vlsu_lookup_ack_.send(mem_access_info_ptr); + } + else + { + out_lsu_lookup_ack_.send(mem_access_info_ptr); + } } uint64_t DCache::getBlockAddr(const MemoryAccessInfoPtr & mem_access_info_ptr) const @@ -218,7 +239,14 @@ namespace olympia uev_mshr_request_.schedule(sparta::Clock::Cycle(1)); } } - out_lsu_lookup_ack_.send(mem_access_info_ptr); + if(mem_access_info_ptr->getInstPtr()->isVector()) + { + out_vlsu_lookup_ack_.send(mem_access_info_ptr); + } + else + { + out_lsu_lookup_ack_.send(mem_access_info_ptr); + } } void DCache::mshrRequest_() @@ -259,7 +287,14 @@ namespace olympia if (mshr_it.isValid()) { MemoryAccessInfoPtr dependant_load_inst = (*mshr_it)->getMemRequest(); - out_lsu_lookup_ack_.send(dependant_load_inst); + if(dependant_load_inst->getInstPtr()->isVector()) + { + out_vlsu_lookup_ack_.send(dependant_load_inst); + } + else + { + out_lsu_lookup_ack_.send(dependant_load_inst); + } ILOG("Removing mshr entry for " << mem_access_info_ptr); mshr_file_.erase(mem_access_info_ptr->getMSHRInfoIterator()); @@ -272,7 +307,14 @@ namespace olympia void DCache::receiveMemReqFromLSU_(const MemoryAccessInfoPtr & memory_access_info_ptr) { ILOG("Received memory access request from LSU " << memory_access_info_ptr); - out_lsu_lookup_ack_.send(memory_access_info_ptr); + if(memory_access_info_ptr->getInstPtr()->isVector()) + { + out_vlsu_lookup_ack_.send(memory_access_info_ptr); + } + else + { + out_lsu_lookup_ack_.send(memory_access_info_ptr); + } in_l2_cache_resp_receive_event_.schedule(); lsu_mem_access_info_ = memory_access_info_ptr; } diff --git a/core/DCache.hpp b/core/DCache.hpp index 36d0349d..1999faca 100644 --- a/core/DCache.hpp +++ b/core/DCache.hpp @@ -118,6 +118,14 @@ namespace olympia sparta::DataOutPort out_l2cache_req_{&unit_port_set_, "out_l2cache_req", 0}; + sparta::SignalOutPort out_vlsu_free_req_{&unit_port_set_, "out_vlsu_free_req", 0}; + + sparta::DataOutPort out_vlsu_lookup_ack_{&unit_port_set_, + "out_vlsu_lookup_ack", 0}; + + sparta::DataOutPort out_vlsu_lookup_req_{&unit_port_set_, + "out_vlsu_lookup_req", 1}; + //////////////////////////////////////////////////////////////////////////////// // Events //////////////////////////////////////////////////////////////////////////////// diff --git a/core/Decode.cpp b/core/Decode.cpp index 05778755..3ac5db2a 100644 --- a/core/Decode.cpp +++ b/core/Decode.cpp @@ -65,7 +65,7 @@ namespace olympia sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(Decode, sendInitialCredits_)); - VCSRs_.setVCSRs(p->init_vl, p->init_sew, p->init_lmul, p->init_vta); + VectorConfig_.setVCSRs(p->init_vl, p->init_sew, p->init_lmul, p->init_vta); } // Send fetch the initial credit count @@ -106,12 +106,12 @@ namespace olympia void Decode::receiveUopQueueCredits_(const uint32_t & credits) { uop_queue_credits_ += credits; - if (fetch_queue_.size() > 0) + if (fetch_queue_.size() + uop_queue_.size() > 0) { ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0)); } - ILOG("Received credits: " << uop_queue_credits_in_); + ILOG("Received credits: " << credits << " " << uop_queue_credits_in_); } // Called when the fetch buffer was appended by Fetch. If decode @@ -131,37 +131,37 @@ namespace olympia } } - void Decode::updateVcsrs_(const InstPtr & inst) + void Decode::updateVectorConfig_(const InstPtr & inst) { - VCSRs_.setVCSRs(inst->getVL(), inst->getSEW(), inst->getLMUL(), inst->getVTA()); + VectorConfig_.setVCSRs(inst->getVL(), inst->getSEW(), inst->getLMUL(), inst->getVTA()); const uint64_t uid = inst->getOpCodeInfo()->getInstructionUniqueID(); if ((uid == mavis_vsetvli_uid_) && inst->hasZeroRegSource()) { // If rs1 is x0 and rd is x0 then the vl is unchanged (assuming it is legal) - VCSRs_.vl = inst->hasZeroRegDest() ? std::min(VCSRs_.vl, VCSRs_.vlmax) - : VCSRs_.vlmax; + VectorConfig_.vl = inst->hasZeroRegDest() ? std::min(VectorConfig_.vl, VectorConfig_.vlmax) + : VectorConfig_.vlmax; } ILOG("Processing vset{i}vl{i} instruction: " << inst); - ILOG(" LMUL: " << VCSRs_.lmul); - ILOG(" SEW: " << VCSRs_.sew); - ILOG(" VTA: " << VCSRs_.vta); - ILOG(" VLMAX: " << VCSRs_.vlmax); - ILOG(" VL: " << VCSRs_.vl); + ILOG(" LMUL: " << VectorConfig_.lmul); + ILOG(" SEW: " << VectorConfig_.sew); + ILOG(" VTA: " << VectorConfig_.vta); + ILOG(" VLMAX: " << VectorConfig_.vlmax); + ILOG(" VL: " << VectorConfig_.vl); // Check validity of vector config - sparta_assert(VCSRs_.lmul <= 8, - "LMUL (" << VCSRs_.lmul << ") cannot be greater than " << 8); - sparta_assert(VCSRs_.vl <= VCSRs_.vlmax, - "VL (" << VCSRs_.vl << ") cannot be greater than VLMAX ("<< VCSRs_.vlmax << ")"); + sparta_assert(VectorConfig_.lmul <= 8, + "LMUL (" << VectorConfig_.lmul << ") cannot be greater than " << 8); + sparta_assert(VectorConfig_.vl <= VectorConfig_.vlmax, + "VL (" << VectorConfig_.vl << ") cannot be greater than VLMAX ("<< VectorConfig_.vlmax << ")"); } // process vset settings being forward from execution pipe // for set instructions that depend on register void Decode::process_vset_(const InstPtr & inst) { - updateVcsrs_(inst); + updateVectorConfig_(inst); // if rs1 != 0, VL = x[rs1], so we assume there's an STF field for VL if (waiting_on_vset_) @@ -176,7 +176,7 @@ namespace olympia void Decode::handleFlush_(const FlushManager::FlushingCriteria & criteria) { ILOG("Got a flush call for " << criteria); - fetch_queue_credits_outp_.send(fetch_queue_.size()); + fetch_queue_credits_outp_.send(fetch_queue_.size() + uop_queue_.size()); fetch_queue_.clear(); // Reset the vector uop generator @@ -238,7 +238,7 @@ namespace olympia if ((uid == mavis_vsetivli_uid_) || ((uid == mavis_vsetvli_uid_) && inst->hasZeroRegSource())) { - updateVcsrs_(inst); + updateVectorConfig_(inst); } else if (uid == mavis_vsetvli_uid_ || uid == mavis_vsetvl_uid_) { @@ -253,7 +253,7 @@ namespace olympia if (!inst->isVset() && inst->isVector()) { // set LMUL, VSET, VL, VTA for any other vector instructions - inst->setVCSRs(&VCSRs_); + inst->setVectorConfigVCSRs(&VectorConfig_); } } @@ -264,14 +264,14 @@ namespace olympia { ILOG("Vector uop gen: " << inst); vec_uop_gen_->setInst(inst); - + // Even if LMUL == 1, we need the vector uop generator to create a uop for us // because some generators will add additional sources and destinations to the // instruction (e.g. widening, multiply-add, slides). while(vec_uop_gen_->getNumUopsRemaining() >= 1) { const InstPtr uop = vec_uop_gen_->generateUop(); - if (insts->size() < num_to_decode_) + if (insts->size() < num_to_decode) { insts->emplace_back(uop); uop->setStatus(Inst::Status::DECODED); @@ -341,6 +341,9 @@ namespace olympia // uint32_t unfusedInstsSize = insts->size(); // Decrement internal Uop Queue credits + ILOG(uop_queue_credits_) + ILOG(num_to_decode) + ILOG(insts->size()) sparta_assert(uop_queue_credits_ >= insts->size(), "Attempt to decrement d0q credits below what is available"); uop_queue_credits_ -= insts->size(); @@ -352,6 +355,7 @@ namespace olympia // instructions in the queue, schedule another decode session if (uop_queue_credits_ > 0 && (fetch_queue_.size() + uop_queue_.size()) > 0) { + ILOG("Scheduling decode event, instructions still left") ev_decode_insts_event_.schedule(1); } } diff --git a/core/Decode.hpp b/core/Decode.hpp index 209d30bf..6241f5de 100644 --- a/core/Decode.hpp +++ b/core/Decode.hpp @@ -333,7 +333,7 @@ namespace olympia //! \brief the fusion group definition files, JSON or (future) FSL const std::vector fusion_group_definitions_; - Inst::VCSRs VCSRs_; + Inst::VectorConfig VectorConfig_; MavisType* mavis_facade_; @@ -344,7 +344,7 @@ namespace olympia bool waiting_on_vset_; // Helper method to update VCSRs - void updateVcsrs_(const InstPtr &); + void updateVectorConfig_(const InstPtr &); ////////////////////////////////////////////////////////////////////// // Decoder callbacks diff --git a/core/Dispatch.cpp b/core/Dispatch.cpp index 74ce4a14..7a644ffe 100644 --- a/core/Dispatch.cpp +++ b/core/Dispatch.cpp @@ -117,6 +117,9 @@ namespace olympia // Special case for the LSU dispatchers_[static_cast(InstArchInfo::TargetPipe::LSU)].emplace_back( new Dispatcher("lsu", this, info_logger_, &in_lsu_credits_, &out_lsu_write_)); + // Special case for VLSU + dispatchers_[static_cast(InstArchInfo::TargetPipe::VLSU)].emplace_back( + new Dispatcher("vlsu", this, info_logger_, &in_vlsu_credits_, &out_vlsu_write_)); in_lsu_credits_.enableCollection(node); in_reorder_credits_.registerConsumerHandler( @@ -237,7 +240,7 @@ namespace olympia "pipe. Did you define it in the yaml properly?"); // so we have a map here that checks for which valid dispatchers for that // instruction target pipe map needs to be: "int": [exe0, exe1, exe2] - if (target_pipe != InstArchInfo::TargetPipe::LSU) + if (!ex_inst_ptr->isLoadStoreInst()) { uint32_t max_credits = 0; olympia::Dispatcher* best_dispatcher = nullptr; diff --git a/core/Dispatch.hpp b/core/Dispatch.hpp index 1f94c0f9..c58202c5 100644 --- a/core/Dispatch.hpp +++ b/core/Dispatch.hpp @@ -86,6 +86,10 @@ namespace olympia sparta::SchedulingPhase::Tick, 0}; sparta::DataOutPort out_lsu_write_{&unit_port_set_, "out_lsu_write", false}; + sparta::DataInPort in_vlsu_credits_{&unit_port_set_, "in_vlsu_credits", + sparta::SchedulingPhase::Tick, 0}; + sparta::DataOutPort out_vlsu_write_{&unit_port_set_, "out_vlsu_write", + false}; sparta::DataInPort in_reorder_credits_{ &unit_port_set_, "in_reorder_buffer_credits", sparta::SchedulingPhase::Tick, 0}; sparta::DataOutPort out_reorder_write_{&unit_port_set_, @@ -184,6 +188,8 @@ namespace olympia sparta::Counter::COUNT_NORMAL, getClock()), sparta::CycleCounter(getStatisticSet(), "stall_vset_busy", "VSET busy", sparta::Counter::COUNT_NORMAL, getClock()), + sparta::CycleCounter(getStatisticSet(), "stall_vlsu_busy", "VLSU busy", + sparta::Counter::COUNT_NORMAL, getClock()), sparta::CycleCounter(getStatisticSet(), "stall_sys_busy", "No credits from ROB", sparta::Counter::COUNT_NORMAL, getClock()), sparta::CycleCounter(getStatisticSet(), "stall_not_stalled", @@ -223,6 +229,8 @@ namespace olympia sparta::Counter::COUNT_NORMAL), sparta::Counter(getStatisticSet(), "count_vset_insts", "Total VSET insts", sparta::Counter::COUNT_NORMAL), + sparta::Counter(getStatisticSet(), "count_vlsu_insts", "Total VLSU insts", + sparta::Counter::COUNT_NORMAL), sparta::Counter(getStatisticSet(), "count_sys_insts", "Total SYS insts", sparta::Counter::COUNT_NORMAL)}}; diff --git a/core/Inst.hpp b/core/Inst.hpp index 3853af12..191125d7 100644 --- a/core/Inst.hpp +++ b/core/Inst.hpp @@ -77,12 +77,15 @@ namespace olympia static const uint32_t VLEN = 1024; // vector register default bit size // Vector CSRs - struct VCSRs + struct VectorConfig { uint32_t vl = 16; // vector length uint32_t sew = 8; // set element width uint32_t lmul = 1; // effective length bool vta = false; // vector tail agnostic, false = undisturbed, true = agnostic + uint32_t mop = 0; + uint32_t eew = 0; + uint32_t stride = 0; uint32_t vlmax_formula() { return (VLEN / sew) * lmul; } @@ -98,6 +101,13 @@ namespace olympia vlmax = vlmax_formula(); } + void setVLSU(uint32_t input_eew, uint32_t input_stride, uint32_t input_mop) + { + eew = input_eew; + stride = input_stride; + mop = input_mop; + } + uint32_t vlmax = vlmax_formula(); }; @@ -158,7 +168,7 @@ namespace olympia } } } - + void attemptRetire() { ev_retire_->schedule(); } const Status & getStatus() const { return status_state_; } bool getCompletedStatus() const { return getStatus() == olympia::Inst::Status::COMPLETED; } @@ -244,44 +254,71 @@ namespace olympia void setTargetVAddr(sparta::memory::addr_t target_vaddr) { target_vaddr_ = target_vaddr; } sparta::memory::addr_t getTargetVAddr() const { return target_vaddr_; } - void setVCSRs(const VCSRs * input_VCSRs) + void setVectorConfigVCSRs(const VectorConfig * input_VectorConfig) { - VCSRs_ = *input_VCSRs; + // we only want to update the VCSRs of the VectorConfig + VectorConfigs_.setVCSRs(input_VectorConfig->vl, input_VectorConfig->sew, input_VectorConfig->lmul, input_VectorConfig->vta); } - const VCSRs * getVCSRs() const { return &VCSRs_; } + void setVectorConfigVLSU(const VectorConfig * input_VectorConfig) + { + // we only want to update the VCSRs of the VectorConfig + VectorConfigs_.setVLSU(input_VectorConfig->eew, input_VectorConfig->stride, input_VectorConfig->mop); + } + + const VectorConfig * getVectorConfig() const { return &VectorConfigs_; } // Set lmul from vset (vsetivli, vsetvli) void setLMUL(uint32_t lmul) { - VCSRs_.lmul = lmul; - VCSRs_.vlmax = VCSRs_.vlmax_formula(); + VectorConfigs_.lmul = lmul; + VectorConfigs_.vlmax = VectorConfigs_.vlmax_formula(); } // Set sew from vset (vsetivli, vsetvli) void setSEW(uint32_t sew) { - VCSRs_.sew = sew; - VCSRs_.vlmax = VCSRs_.vlmax_formula(); + VectorConfigs_.sew = sew; + VectorConfigs_.vlmax = VectorConfigs_.vlmax_formula(); } // Set VL from vset (vsetivli, vsetvli) - void setVL(uint32_t vl) { VCSRs_.vl = vl; } + void setVL(uint32_t vl) { VectorConfigs_.vl = vl; } + + // Set EEW from vlsu operation + void setEEW(uint32_t eew) { VectorConfigs_.eew = eew; } + // Set MOP from vlsu operation + void setMOP(uint32_t mop) { VectorConfigs_.mop = mop; } + // Set stride from vlsu operation + void setStride(uint32_t stride) { VectorConfigs_.stride = stride; } // Set VTA (vector tail agnostic) // vta = true means agnostic, set destination values to 1's or maintain original // vta = false means undisturbed, maintain original destination values - void setVTA(bool vta) { VCSRs_.vta = vta; } + void setVTA(bool vta) { VectorConfigs_.vta = vta; } - uint32_t getSEW() const { return VCSRs_.sew; } - uint32_t getLMUL() const { return VCSRs_.lmul; } - uint32_t getVL() const { return VCSRs_.vl; } - uint32_t getVTA() const { return VCSRs_.vta; } - uint32_t getVLMAX() const { return VCSRs_.vlmax; } + uint32_t getSEW() const { return VectorConfigs_.sew; } + uint32_t getLMUL() const { return VectorConfigs_.lmul; } + uint32_t getVL() const { return VectorConfigs_.vl; } + + uint32_t getMOP() const { return VectorConfigs_.mop; } + uint32_t getStride() const { return VectorConfigs_.stride; } + uint32_t getEEW() const { return VectorConfigs_.eew; } + + uint32_t getVTA() const { return VectorConfigs_.vta; } + uint32_t getVLMAX() const { return VectorConfigs_.vlmax; } void setTail(bool has_tail) { has_tail_ = has_tail; } bool hasTail() const { return has_tail_; } + uint32_t getTotalVLSUIters(){ return vlsu_total_iters_; } + + uint32_t getCurrVLSUIters(){ return vlsu_curr_iters_; } + + void setTotalVLSUIters(uint32_t vlsu_total_iters){ vlsu_total_iters_ = vlsu_total_iters; } + + void setCurrVLSUIters(uint32_t vlsu_curr_iters){ vlsu_curr_iters_ = vlsu_curr_iters; } + void setUOpParent(sparta::SpartaWeakPointer & parent_uop) { parent_uop_ = parent_uop; @@ -487,9 +524,12 @@ namespace olympia const bool is_return_; const bool has_immediate_; - VCSRs VCSRs_; + VectorConfig VectorConfigs_; bool has_tail_ = false; // Does this vector uop have a tail? + uint32_t vlsu_total_iters_ = 0; + uint32_t vlsu_curr_iters_ = 0; + // blocking vset is a vset that needs to read a value from a register value. A blocking vset // can't be resolved until after execution, so we need to block on it due to UOp fracturing bool is_blocking_vset_ = false; diff --git a/core/InstArchInfo.cpp b/core/InstArchInfo.cpp index dca1a980..00a3a24b 100644 --- a/core/InstArchInfo.cpp +++ b/core/InstArchInfo.cpp @@ -6,23 +6,24 @@ namespace olympia { const InstArchInfo::TargetPipeMap InstArchInfo::execution_pipe_map = { - {"br", InstArchInfo::TargetPipe::BR}, - {"cmov", InstArchInfo::TargetPipe::CMOV}, - {"div", InstArchInfo::TargetPipe::DIV}, + {"br", InstArchInfo::TargetPipe::BR}, + {"cmov", InstArchInfo::TargetPipe::CMOV}, + {"div", InstArchInfo::TargetPipe::DIV}, {"faddsub", InstArchInfo::TargetPipe::FADDSUB}, {"float", InstArchInfo::TargetPipe::FLOAT}, - {"fmac", InstArchInfo::TargetPipe::FMAC}, - {"i2f", InstArchInfo::TargetPipe::I2F}, - {"f2i", InstArchInfo::TargetPipe::F2I}, - {"int", InstArchInfo::TargetPipe::INT}, - {"lsu", InstArchInfo::TargetPipe::LSU}, - {"mul", InstArchInfo::TargetPipe::MUL}, - {"vint", InstArchInfo::TargetPipe::VINT}, + {"fmac", InstArchInfo::TargetPipe::FMAC}, + {"i2f", InstArchInfo::TargetPipe::I2F}, + {"f2i", InstArchInfo::TargetPipe::F2I}, + {"int", InstArchInfo::TargetPipe::INT}, + {"lsu", InstArchInfo::TargetPipe::LSU}, + {"mul", InstArchInfo::TargetPipe::MUL}, + {"vint", InstArchInfo::TargetPipe::VINT}, {"vmask", InstArchInfo::TargetPipe::VMASK}, - {"vset", InstArchInfo::TargetPipe::VSET}, - {"vmul", InstArchInfo::TargetPipe::VMUL}, - {"vdiv", InstArchInfo::TargetPipe::VDIV}, - {"sys", InstArchInfo::TargetPipe::SYS}, + {"vset", InstArchInfo::TargetPipe::VSET}, + {"vmul", InstArchInfo::TargetPipe::VMUL}, + {"vlsu", InstArchInfo::TargetPipe::VLSU}, + {"vdiv", InstArchInfo::TargetPipe::VDIV}, + {"sys", InstArchInfo::TargetPipe::SYS}, {"?", InstArchInfo::TargetPipe::UNKNOWN} }; @@ -37,6 +38,7 @@ namespace olympia {InstArchInfo::TargetPipe::F2I, "F2I"}, {InstArchInfo::TargetPipe::INT, "INT"}, {InstArchInfo::TargetPipe::LSU, "LSU"}, + {InstArchInfo::TargetPipe::VLSU, "VLSU"}, {InstArchInfo::TargetPipe::MUL, "MUL"}, {InstArchInfo::TargetPipe::VINT, "VINT"}, {InstArchInfo::TargetPipe::VMASK, "VMASK"}, @@ -83,7 +85,16 @@ namespace olympia uop_gen_ = itr->second; } - is_load_store_ = (tgt_pipe_ == TargetPipe::LSU); + if (jobj.find("uop_gen") != jobj.end()) + { + auto uop_gen_name = jobj["uop_gen"].get(); + const auto itr = uop_gen_type_map.find(uop_gen_name); + sparta_assert(itr != uop_gen_type_map.end(), + "Unknown uop gen: " << uop_gen_name << " for inst: " + << jobj["mnemonic"].get()); + uop_gen_ = itr->second; + } + is_load_store_ = (tgt_pipe_ == TargetPipe::LSU || tgt_pipe_ == TargetPipe::VLSU); is_vset_ = {tgt_pipe_ == TargetPipe::VSET}; } diff --git a/core/InstArchInfo.hpp b/core/InstArchInfo.hpp index 8ad855a5..77ef85b9 100644 --- a/core/InstArchInfo.hpp +++ b/core/InstArchInfo.hpp @@ -53,6 +53,7 @@ namespace olympia VMUL, VDIV, VSET, + VLSU, SYS, UNKNOWN }; diff --git a/core/InstGenerator.cpp b/core/InstGenerator.cpp index d4998e38..711f1c57 100644 --- a/core/InstGenerator.cpp +++ b/core/InstGenerator.cpp @@ -77,97 +77,103 @@ namespace olympia // Get the JSON record at the current index nlohmann::json jinst = jobj_->at(curr_inst_index_); - InstPtr inst; - if (jinst.find("opcode") != jinst.end()) + + if (jinst.find("mnemonic") == jinst.end()) { - uint64_t opcode = std::strtoull(jinst["opcode"].get().c_str(), nullptr, 0); - inst = mavis_facade_->makeInst(opcode, clk); + throw sparta::SpartaException() << "Missing mnemonic at " << curr_inst_index_; } - else - { - if (jinst.find("mnemonic") == jinst.end()) - { - throw sparta::SpartaException() << "Missing mnemonic at " << curr_inst_index_; - } - const std::string mnemonic = jinst["mnemonic"]; + const std::string mnemonic = jinst["mnemonic"]; - auto addElement = [&jinst](mavis::OperandInfo & operands, const std::string & key, - const mavis::InstMetaData::OperandFieldID operand_field_id, - const mavis::InstMetaData::OperandTypes operand_type) - { - if (jinst.find(key) != jinst.end()) - { - operands.addElement(operand_field_id, operand_type, jinst[key].get()); - } - }; - - mavis::OperandInfo srcs; - addElement(srcs, "rs1", mavis::InstMetaData::OperandFieldID::RS1, - mavis::InstMetaData::OperandTypes::LONG); - addElement(srcs, "fs1", mavis::InstMetaData::OperandFieldID::RS1, - mavis::InstMetaData::OperandTypes::DOUBLE); - addElement(srcs, "rs2", mavis::InstMetaData::OperandFieldID::RS2, - mavis::InstMetaData::OperandTypes::LONG); - addElement(srcs, "fs2", mavis::InstMetaData::OperandFieldID::RS2, - mavis::InstMetaData::OperandTypes::DOUBLE); - addElement(srcs, "vs1", mavis::InstMetaData::OperandFieldID::RS1, - mavis::InstMetaData::OperandTypes::VECTOR); - addElement(srcs, "vs2", mavis::InstMetaData::OperandFieldID::RS2, - mavis::InstMetaData::OperandTypes::VECTOR); - - mavis::OperandInfo dests; - addElement(dests, "rd", mavis::InstMetaData::OperandFieldID::RD, - mavis::InstMetaData::OperandTypes::LONG); - addElement(dests, "fd", mavis::InstMetaData::OperandFieldID::RD, - mavis::InstMetaData::OperandTypes::DOUBLE); - addElement(dests, "vd", mavis::InstMetaData::OperandFieldID::RD, - mavis::InstMetaData::OperandTypes::VECTOR); - - if (jinst.find("imm") != jinst.end()) - { - const uint64_t imm = jinst["imm"].get(); - mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests, imm); - inst = mavis_facade_->makeInstDirectly(ex_info, clk); - } - else + auto addElement = [&jinst](mavis::OperandInfo & operands, const std::string & key, + const mavis::InstMetaData::OperandFieldID operand_field_id, + const mavis::InstMetaData::OperandTypes operand_type) + { + if (jinst.find(key) != jinst.end()) { - mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests); - inst = mavis_facade_->makeInstDirectly(ex_info, clk); + operands.addElement(operand_field_id, operand_type, jinst[key].get()); } + }; - if (jinst.find("vaddr") != jinst.end()) - { - uint64_t vaddr = std::strtoull(jinst["vaddr"].get().c_str(), nullptr, 0); - inst->setTargetVAddr(vaddr); - } - if (jinst.find("vtype") != jinst.end()) - { - // immediate, so decode from hex - uint64_t vtype = std::strtoull(jinst["vtype"].get().c_str(), nullptr, 0); - std::string binaryString = std::bitset<32>(vtype).to_string(); - uint32_t sew = std::pow(2, std::stoi(binaryString.substr(26, 3), nullptr, 2)) * 8; - uint32_t lmul = std::pow(2, std::stoi(binaryString.substr(29, 3), nullptr, 2)); - inst->setLMUL(lmul); - inst->setSEW(sew); - } + mavis::OperandInfo srcs; + addElement(srcs, "rs1", mavis::InstMetaData::OperandFieldID::RS1, + mavis::InstMetaData::OperandTypes::LONG); + addElement(srcs, "fs1", mavis::InstMetaData::OperandFieldID::RS1, + mavis::InstMetaData::OperandTypes::DOUBLE); + addElement(srcs, "rs2", mavis::InstMetaData::OperandFieldID::RS2, + mavis::InstMetaData::OperandTypes::LONG); + addElement(srcs, "fs2", mavis::InstMetaData::OperandFieldID::RS2, + mavis::InstMetaData::OperandTypes::DOUBLE); + addElement(srcs, "vs1", mavis::InstMetaData::OperandFieldID::RS1, + mavis::InstMetaData::OperandTypes::VECTOR); + addElement(srcs, "vs2", mavis::InstMetaData::OperandFieldID::RS2, + mavis::InstMetaData::OperandTypes::VECTOR); + addElement(srcs, "vs3", mavis::InstMetaData::OperandFieldID::RS3, + mavis::InstMetaData::OperandTypes::VECTOR); - if (jinst.find("vta") != jinst.end()) - { - const bool vta = jinst["vta"].get() > 0 ? true: false; - inst->setVTA(vta); - } + mavis::OperandInfo dests; + addElement(dests, "rd", mavis::InstMetaData::OperandFieldID::RD, + mavis::InstMetaData::OperandTypes::LONG); + addElement(dests, "fd", mavis::InstMetaData::OperandFieldID::RD, + mavis::InstMetaData::OperandTypes::DOUBLE); + addElement(dests, "vd", mavis::InstMetaData::OperandFieldID::RD, + mavis::InstMetaData::OperandTypes::VECTOR); - if (jinst.find("vl") != jinst.end()) - { - const uint64_t vl = jinst["vl"].get(); - inst->setVL(vl); - } - - if (jinst.find("taken") != jinst.end()) - { - const bool taken = jinst["taken"].get(); - inst->setTakenBranch(taken); - } + InstPtr inst; + if (jinst.find("imm") != jinst.end()) + { + const uint64_t imm = jinst["imm"].get(); + mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests, imm); + inst = mavis_facade_->makeInstDirectly(ex_info, clk); + } + else + { + mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests); + inst = mavis_facade_->makeInstDirectly(ex_info, clk); + } + if (jinst.find("vaddr") != jinst.end()) + { + uint64_t vaddr = std::strtoull(jinst["vaddr"].get().c_str(), nullptr, 0); + inst->setTargetVAddr(vaddr); + } + if (jinst.find("vtype") != jinst.end()) + { + // immediate, so decode from hex + uint64_t vtype = std::strtoull(jinst["vtype"].get().c_str(), nullptr, 0); + std::string binaryString = std::bitset<32>(vtype).to_string(); + uint32_t sew = std::pow(2, std::stoi(binaryString.substr(26, 3), nullptr, 2)) * 8; + uint32_t lmul = std::pow(2, std::stoi(binaryString.substr(29, 3), nullptr, 2)); + inst->setLMUL(lmul); + inst->setSEW(sew); + } + if (jinst.find("vta") != jinst.end()) + { + const bool vta = jinst["vta"].get() > 0 ? true: false; + inst->setVTA(vta); + } + if (jinst.find("vl") != jinst.end()) + { + const uint64_t vl = jinst["vl"].get(); + inst->setVL(vl); + } + if (jinst.find("mop") != jinst.end()) + { + const uint64_t mop = jinst["mop"].get(); + inst->setMOP(mop); + } + if (jinst.find("eew") != jinst.end()) + { + const uint64_t eew = jinst["eew"].get(); + inst->setEEW(eew); + } + if (jinst.find("stride") != jinst.end()) + { + const uint64_t stride = jinst["stride"].get(); + inst->setStride(stride); + } + if (jinst.find("taken") != jinst.end()) + { + const bool taken = jinst["taken"].get(); + inst->setTakenBranch(taken); } inst->setRewindIterator(curr_inst_index_); @@ -268,4 +274,4 @@ namespace olympia return nullptr; } -} // namespace olympia +} // namespace olympia \ No newline at end of file diff --git a/core/LSU.cpp b/core/LSU.cpp index 2324cddd..95f748e3 100644 --- a/core/LSU.cpp +++ b/core/LSU.cpp @@ -15,6 +15,7 @@ namespace olympia LSU::LSU(sparta::TreeNode* node, const LSUParameterSet* p) : sparta::Unit(node), + //data_width_(p->data_width), ldst_inst_queue_("lsu_inst_queue", p->ldst_inst_queue_size, getClock()), ldst_inst_queue_size_(p->ldst_inst_queue_size), replay_buffer_("replay_buffer", p->replay_buffer_size, getClock()), @@ -258,7 +259,7 @@ namespace olympia { sparta_assert(inst_ptr->getStatus() == Inst::Status::RETIRED, "Get ROB Ack, but the store inst hasn't retired yet!"); - + sparta_assert(!inst_ptr->isVector(), "Vector instruction is being processed by LSU, error!") ++stores_retired_; updateIssuePriorityAfterStoreInstRetire_(inst_ptr); @@ -1226,6 +1227,7 @@ namespace olympia // Update issue priority after store instruction retires void LSU::updateIssuePriorityAfterStoreInstRetire_(const InstPtr & inst_ptr) { + sparta_assert(!inst_ptr->isVector(), "Vector Instruction got into LSU, error!") for (auto & inst_info_ptr : ldst_inst_queue_) { if (inst_info_ptr->getInstPtr() == inst_ptr) @@ -1233,8 +1235,8 @@ namespace olympia if (inst_info_ptr->getState() != LoadStoreInstInfo::IssueState::ISSUED) // Speculative misses are marked as - // not ready and replay event would - // set them back to ready + // not ready and replay event would + // set them back to ready { inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); } diff --git a/core/LoadStoreInstInfo.hpp b/core/LoadStoreInstInfo.hpp index 3f9151cc..068ebe25 100644 --- a/core/LoadStoreInstInfo.hpp +++ b/core/LoadStoreInstInfo.hpp @@ -71,11 +71,19 @@ namespace olympia return mem_access_info_ptr == nullptr ? 0 : mem_access_info_ptr->getInstUniqueID(); } + // This is a function which will be added in the SPARTA_ADDPAIRs API. + uint64_t getInstUOpID() const + { + const MemoryAccessInfoPtr & mem_access_info_ptr = getMemoryAccessInfoPtr(); + return mem_access_info_ptr == nullptr ? 0 : mem_access_info_ptr->getInstUOpID(); + } + // Get the mnemonic of the instruction this load/store is // associated. Will return if not associated - std::string getMnemonic() const { - return (mem_access_info_ptr_ != nullptr ? - mem_access_info_ptr_->getMnemonic() : ""); + std::string getMnemonic() const + { + return (mem_access_info_ptr_ != nullptr ? mem_access_info_ptr_->getMnemonic() + : ""); } void setPriority(const IssuePriority & rank) { rank_.setValue(rank); } @@ -90,6 +98,10 @@ namespace olympia bool isRetired() const { return getInstPtr()->getStatus() == Inst::Status::RETIRED; } + void setIsLastMemOp(bool is_last_mem_op) { is_last_mem_op_ = is_last_mem_op; } + + bool isLastMemOp() const { return is_last_mem_op_; } + bool winArb(const LoadStoreInstInfoPtr & that) const { if (that == nullptr) @@ -127,14 +139,31 @@ namespace olympia friend bool operator<(const LoadStoreInstInfoPtr & lhs, const LoadStoreInstInfoPtr & rhs) { - return lhs->getInstUniqueID() < rhs->getInstUniqueID(); + if (lhs->getInstUniqueID() == rhs->getInstUniqueID()) + { + // if UID is the same, check Uops for vector + return lhs->getInstUOpID() < rhs->getInstUOpID(); + } + else + { + return lhs->getInstUniqueID() < rhs->getInstUniqueID(); + } } + void setVLSUStatusState(Inst::Status vlsu_status_state) + { + vlsu_status_state_ = vlsu_status_state; + } + + Inst::Status getVLSUStatusState() { return vlsu_status_state_; } + private: MemoryAccessInfoPtr mem_access_info_ptr_; sparta::State rank_; sparta::State state_; bool in_ready_queue_; + bool is_last_mem_op_ = false; + Inst::Status vlsu_status_state_; }; // class LoadStoreInstInfo using LoadStoreInstInfoAllocator = sparta::SpartaSharedPointerAllocator; @@ -196,7 +225,7 @@ namespace olympia { os << "lsinfo: " << "uid: " << ls_info.getInstUniqueID() << " pri:" << ls_info.getPriority() - << " state: " << ls_info.getState(); + << "uopid: " << ls_info.getInstUOpID() << " state: " << ls_info.getState(); return os; } @@ -219,18 +248,20 @@ namespace olympia */ class LoadStoreInstInfoPair : public sparta::PairDefinition { - public: - - // The SPARTA_ADDPAIRs APIs must be called during the construction of the PairDefinition class - LoadStoreInstInfoPair() : sparta::PairDefinition() { + public: + // The SPARTA_ADDPAIRs APIs must be called during the construction of the PairDefinition + // class + LoadStoreInstInfoPair() : sparta::PairDefinition() + { SPARTA_INVOKE_PAIRS(LoadStoreInstInfo); } - SPARTA_REGISTER_PAIRS(SPARTA_ADDPAIR("DID", &LoadStoreInstInfo::getInstUniqueID), // Used by Argos to color code - SPARTA_ADDPAIR("uid", &LoadStoreInstInfo::getInstUniqueID), - SPARTA_ADDPAIR("mnemonic", &LoadStoreInstInfo::getMnemonic), - SPARTA_ADDPAIR("pri:", &LoadStoreInstInfo::getPriority), - SPARTA_ADDPAIR("state", &LoadStoreInstInfo::getState)) + SPARTA_REGISTER_PAIRS( + SPARTA_ADDPAIR("DID", + &LoadStoreInstInfo::getInstUniqueID), // Used by Argos to color code + SPARTA_ADDPAIR("uid", &LoadStoreInstInfo::getInstUniqueID), + SPARTA_ADDPAIR("mnemonic", &LoadStoreInstInfo::getMnemonic), + SPARTA_ADDPAIR("pri:", &LoadStoreInstInfo::getPriority), + SPARTA_ADDPAIR("state", &LoadStoreInstInfo::getState)) }; - } // namespace olympia diff --git a/core/MMU.cpp b/core/MMU.cpp index 5acf6f5b..339d5c49 100644 --- a/core/MMU.cpp +++ b/core/MMU.cpp @@ -81,7 +81,12 @@ namespace olympia uev_lookup_inst_.schedule(sparta::Clock::Cycle(mmu_latency_)); } } - out_lsu_lookup_ack_.send(memory_access_info_ptr); + if(memory_access_info_ptr->isVector()){ + out_vlsu_lookup_ack_.send(memory_access_info_ptr); + } + else{ + out_lsu_lookup_ack_.send(memory_access_info_ptr); + } } // TLB ready for memory access @@ -89,7 +94,12 @@ namespace olympia { busy_ = false; reloadTLB_(mmu_pending_inst_->getInstPtr()->getTargetVAddr()); - out_lsu_lookup_req_.send(mmu_pending_inst_); + if(mmu_pending_inst_->isVector()){ + out_vlsu_lookup_req_.send(mmu_pending_inst_); + } + else{ + out_lsu_lookup_req_.send(mmu_pending_inst_); + } } } // namespace olympia diff --git a/core/MMU.hpp b/core/MMU.hpp index c4e4ebc3..f0caac3b 100644 --- a/core/MMU.hpp +++ b/core/MMU.hpp @@ -62,6 +62,15 @@ namespace olympia { sparta::DataOutPort out_lsu_lookup_req_ {&unit_port_set_, "out_lsu_lookup_req", 1}; + + sparta::SignalOutPort out_vlsu_free_req_ + {&unit_port_set_, "out_vlsu_free_req", 0}; + + sparta::DataOutPort out_vlsu_lookup_ack_ + {&unit_port_set_, "out_vlsu_lookup_ack", 0}; + + sparta::DataOutPort out_vlsu_lookup_req_ + {&unit_port_set_, "out_vlsu_lookup_req", 1}; //////////////////////////////////////////////////////////////////////////////// // Events diff --git a/core/MemoryAccessInfo.hpp b/core/MemoryAccessInfo.hpp index f0d2cf6b..94738802 100644 --- a/core/MemoryAccessInfo.hpp +++ b/core/MemoryAccessInfo.hpp @@ -106,13 +106,22 @@ namespace olympia return inst_ptr == nullptr ? 0 : inst_ptr->getUniqueID(); } + // This is a function which will be added in the SPARTA_ADDPAIRs API. + uint64_t getInstUOpID() const + { + const InstPtr & inst_ptr = getInstPtr(); + return inst_ptr == nullptr ? 0 : inst_ptr->getUOpID(); + } + void setPhyAddrStatus(bool is_ready) { phy_addr_ready_ = is_ready; } bool getPhyAddrStatus() const { return phy_addr_ready_; } uint64_t getPhyAddr() const { return ldst_inst_ptr_->getRAdr(); } - sparta::memory::addr_t getVAddr() const { return ldst_inst_ptr_->getTargetVAddr(); } + sparta::memory::addr_t getVAddr() const { return vaddr_; } + + void setVAddr(sparta::memory::addr_t vaddr) { vaddr_ = vaddr; } void setSrcUnit(const ArchUnit & src_unit) { src_ = src_unit; } @@ -171,6 +180,7 @@ namespace olympia mshr_entry_info_iterator_ = iter; } + bool isVector(){ return getInstPtr()->isVector(); } private: // load/store instruction pointer InstPtr ldst_inst_ptr_; @@ -199,6 +209,8 @@ namespace olympia LoadStoreInstIterator issue_queue_iterator_; LoadStoreInstIterator replay_queue_iterator_; MSHREntryInfoIterator mshr_entry_info_iterator_; + + sparta::memory::addr_t vaddr_; }; using MemoryAccessInfoPtr = sparta::SpartaSharedPointer; @@ -279,7 +291,14 @@ namespace olympia inline std::ostream & operator<<(std::ostream & os, const olympia::MemoryAccessInfo & mem) { - os << "memptr: " << mem.getInstPtr(); + if(mem.getInstPtr()->isVector()) + { + os << "memptr: " << mem.getInstPtr() << " vaddr: " << mem.getVAddr(); + } + else + { + os << "memptr: " << mem.getInstPtr(); + } return os; } diff --git a/core/ROB.cpp b/core/ROB.cpp index a262b136..f45226c6 100644 --- a/core/ROB.cpp +++ b/core/ROB.cpp @@ -112,6 +112,7 @@ namespace olympia void ROB::retireInstructions_() { + ILOG("Retiring") // ROB is expecting a flush (back to itself) if (expect_flush_) { @@ -130,25 +131,31 @@ namespace olympia auto & ex_inst = *ex_inst_ptr; sparta_assert(ex_inst.isSpeculative() == false, "Uh, oh! A speculative instruction is being retired: " << ex_inst); - if (ex_inst.getStatus() == Inst::Status::COMPLETED) { // UPDATE: ex_inst.setStatus(Inst::Status::RETIRED); - if (ex_inst.isStoreInst()) - { + if (ex_inst.isStoreInst() && !ex_inst.isVector()) { + // We don't send signal back for vector because + // statuses are held by load_store_info_ptr, not inst_ptr + // like in LSU out_rob_retire_ack_.send(ex_inst_ptr); } + // sending retired instruction to rename out_rob_retire_ack_rename_.send(ex_inst_ptr); - // All instructions count as 1 uop ++num_uops_retired_; if (ex_inst_ptr->getUOpID() == 0) { ++num_retired_; ++retired_this_cycle; - + ILOG( "\nIncrementing" << + "\n expected: " << expected_program_id_ << + "\n received: " << ex_inst.getProgramID() << + "\n UID: " << ex_inst_ptr->getMavisUid() << + "\n incr: " << ex_inst_ptr->getProgramIDIncrement() << + "\n inst " << ex_inst) // Use the program ID to verify that the program order has been maintained. sparta_assert(ex_inst.getProgramID() == expected_program_id_, "\nUnexpected program ID when retiring instruction" << @@ -163,7 +170,6 @@ namespace olympia // were eliminated and adjusts the progID as needed expected_program_id_ += ex_inst.getProgramIDIncrement(); } - reorder_buffer_.pop(); ILOG("retiring " << ex_inst); diff --git a/core/ROB.hpp b/core/ROB.hpp index 23ab29ec..48e1360f 100644 --- a/core/ROB.hpp +++ b/core/ROB.hpp @@ -103,6 +103,7 @@ namespace olympia sparta::DataOutPort out_retire_flush_ {&unit_port_set_, "out_retire_flush"}; // UPDATE: sparta::DataOutPort out_rob_retire_ack_ {&unit_port_set_, "out_rob_retire_ack"}; + sparta::DataOutPort out_rob_retire_ack_vlsu_ {&unit_port_set_, "out_rob_retire_ack_vlsu"}; sparta::DataOutPort out_rob_retire_ack_rename_ {&unit_port_set_, "out_rob_retire_ack_rename"}; // For flush diff --git a/core/Rename.cpp b/core/Rename.cpp index b6091e0a..91ea7fc0 100644 --- a/core/Rename.cpp +++ b/core/Rename.cpp @@ -143,6 +143,7 @@ namespace olympia { sparta_assert(inst_ptr->getStatus() == Inst::Status::RETIRED, "Get ROB Ack, but the inst hasn't retired yet!"); + // loop through all Uops, mark dest/srcs accordingly auto const & dests = inst_ptr->getDestOpInfoList(); if (dests.size() > 0) { @@ -195,6 +196,7 @@ namespace olympia freelist_[src.rf].push(src.val); } } + // Instruction queue bookkeeping if (SPARTA_EXPECT_TRUE(!inst_queue_.empty())) { @@ -443,7 +445,8 @@ namespace olympia { // check for data operand existing based on RS2 existence // store data register info separately - if (src.field_id == mavis::InstMetaData::OperandFieldID::RS2) + // for vector, data operand is in RS3 + if (src.field_id == mavis::InstMetaData::OperandFieldID::RS2 || src.field_id == mavis::InstMetaData::OperandFieldID::RS3) { auto & bitmask = renaming_inst->getDataRegisterBitMask(rf); const uint32_t prf = map_table_[rf][num]; diff --git a/core/VLSU.cpp b/core/VLSU.cpp new file mode 100644 index 00000000..6705807a --- /dev/null +++ b/core/VLSU.cpp @@ -0,0 +1,1467 @@ +#include "sparta/utils/SpartaAssert.hpp" +#include "CoreUtils.hpp" +#include "VLSU.hpp" +#include "sparta/simulation/Unit.hpp" +#include +#include "Decode.hpp" + +#include "OlympiaAllocators.hpp" + +namespace olympia +{ + const char VLSU::name[] = "VLSU"; + + //////////////////////////////////////////////////////////////////////////////// + // Constructor + //////////////////////////////////////////////////////////////////////////////// + + VLSU::VLSU(sparta::TreeNode* node, const VLSUParameterSet* p) : + sparta::Unit(node), + mem_request_queue_("mem_request_queue", p->mem_request_queue_size, getClock()), + inst_queue_("VLSUInstQueue", p->mem_request_queue_size, node->getClock(), &unit_stat_set_), + mem_request_queue_size_(p->mem_request_queue_size), + inst_queue_size_(p->inst_queue_size), + replay_buffer_("replay_buffer", p->replay_buffer_size, getClock()), + replay_buffer_size_(p->replay_buffer_size), + replay_issue_delay_(p->replay_issue_delay), + ready_queue_(), + data_width_(p->data_width), + load_store_info_allocator_(sparta::notNull(OlympiaAllocators::getOlympiaAllocators(node)) + ->load_store_info_allocator), + memory_access_allocator_(sparta::notNull(OlympiaAllocators::getOlympiaAllocators(node)) + ->memory_access_allocator), + address_calculation_stage_(0), + mmu_lookup_stage_(address_calculation_stage_ + p->mmu_lookup_stage_length), + cache_lookup_stage_(mmu_lookup_stage_ + p->cache_lookup_stage_length), + cache_read_stage_(cache_lookup_stage_ + + 1), // Get data from the cache in the cycle after cache lookup + complete_stage_( + cache_read_stage_ + + p->cache_read_stage_length), // Complete stage is after the cache read stage + ldst_pipeline_("LoadStorePipeline", (complete_stage_ + 1), + getClock()), // complete_stage_ + 1 is number of stages + allow_speculative_load_exec_(p->allow_speculative_load_exec) + { + sparta_assert(p->mmu_lookup_stage_length > 0, + "MMU lookup stage should atleast be one cycle"); + sparta_assert(p->cache_read_stage_length > 0, + "Cache read stage should atleast be one cycle"); + sparta_assert(p->cache_lookup_stage_length > 0, + "Cache lookup stage should atleast be one cycle"); + + // Pipeline collection config + ldst_pipeline_.enableCollection(node); + mem_request_queue_.enableCollection(node); + replay_buffer_.enableCollection(node); + + // Startup handler for sending initial credits + sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(VLSU, sendInitialCredits_)); + + // Port config + in_vlsu_insts_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, getInstsFromDispatch_, InstPtr)); + + in_rob_retire_ack_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, getAckFromROB_, InstPtr)); + + in_reorder_flush_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, handleFlush_, FlushManager::FlushingCriteria)); + + in_mmu_lookup_req_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, handleMMUReadyReq_, MemoryAccessInfoPtr)); + + in_mmu_lookup_ack_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, getAckFromMMU_, MemoryAccessInfoPtr)); + + in_cache_lookup_req_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, handleCacheReadyReq_, MemoryAccessInfoPtr)); + + in_cache_lookup_ack_.registerConsumerHandler( + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, getAckFromCache_, MemoryAccessInfoPtr)); + + // Allow the pipeline to create events and schedule work + ldst_pipeline_.performOwnUpdates(); + + // There can be situations where NOTHING is going on in the + // simulator but forward progression of the pipeline elements. + // In this case, the internal event for the LS pipeline will + // be the only event keeping simulation alive. Sparta + // supports identifying non-essential events (by calling + // setContinuing to false on any event). + ldst_pipeline_.setContinuing(true); + + ldst_pipeline_.registerHandlerAtStage( + address_calculation_stage_, CREATE_SPARTA_HANDLER(VLSU, handleAddressCalculation_)); + + ldst_pipeline_.registerHandlerAtStage(mmu_lookup_stage_, + CREATE_SPARTA_HANDLER(VLSU, handleMMULookupReq_)); + + ldst_pipeline_.registerHandlerAtStage(cache_lookup_stage_, + CREATE_SPARTA_HANDLER(VLSU, handleCacheLookupReq_)); + + ldst_pipeline_.registerHandlerAtStage(cache_read_stage_, + CREATE_SPARTA_HANDLER(VLSU, handleCacheRead_)); + + ldst_pipeline_.registerHandlerAtStage(complete_stage_, + CREATE_SPARTA_HANDLER(VLSU, completeInst_)); + + // Capture when the simulation is stopped prematurely by the ROB i.e. hitting retire limit + node->getParent()->registerForNotification( + this, "rob_stopped_notif_channel", false /* ROB maybe not be constructed yet */); + + uev_append_ready_ >> uev_issue_inst_; + // NOTE: + // To resolve the race condition when: + // Both cache and MMU try to drive the single BIU port at the same cycle + // Here we give cache the higher priority + ILOG("VLSU construct: #" << node->getGroupIdx()); + } + + VLSU::~VLSU() + { + DLOG(getContainer()->getLocation() << ": " << load_store_info_allocator_.getNumAllocated() + << " LoadStoreInstInfo objects allocated/created"); + DLOG(getContainer()->getLocation() << ": " << memory_access_allocator_.getNumAllocated() + << " MemoryAccessInfo objects allocated/created"); + } + + void VLSU::onROBTerminate_(const bool & val) { rob_stopped_simulation_ = val; } + + void VLSU::onStartingTeardown_() + { + // If ROB has not stopped the simulation & + // the ldst has entries to process we should fail + if ((false == rob_stopped_simulation_) && (false == mem_request_queue_.empty())) + { + dumpDebugContent_(std::cerr); + sparta_assert(false, "Issue queue has pending instructions"); + } + } + + //////////////////////////////////////////////////////////////////////////////// + // Callbacks + //////////////////////////////////////////////////////////////////////////////// + + // Send initial credits (inst_queue_size_) to Dispatch Unit + void VLSU::sendInitialCredits_() + { + setupScoreboard_(); + out_vlsu_credits_.send(inst_queue_size_); + + ILOG("VLSU initial credits for Dispatch Unit: " << inst_queue_size_); + } + + // Setup scoreboard View + void VLSU::setupScoreboard_() + { + // Setup scoreboard view upon register file + // if we ever move to multicore, we only want to have resources look for scoreboard in their + // cpu if we're running a test where we only have top.rename or top.issue_queue, then we can + // just use the root + auto cpu_node = getContainer()->findAncestorByName("core.*"); + if (cpu_node == nullptr) + { + cpu_node = getContainer()->getRoot(); + } + for (uint32_t rf = 0; rf < core_types::RegFile::N_REGFILES; + ++rf) // for (const auto rf : reg_files) + { + scoreboard_views_[rf].reset(new sparta::ScoreboardView( + getContainer()->getName(), core_types::regfile_names[rf], cpu_node)); + } + } + + // Receive new load/store instruction from Dispatch Unit + void VLSU::getInstsFromDispatch_(const InstPtr & inst_ptr) + { + ILOG("New instruction added to the ldst queue " << inst_ptr); + sparta_assert(inst_queue_.size() < inst_queue_size_, + "More instructions appended to inst queue then allowed!"); + inst_queue_.push(inst_ptr); + memRequestGenerator_(); + vlsu_insts_dispatched_++; + } + + void VLSU::memRequestGenerator_() + { + const InstPtr & inst_ptr = inst_queue_.read(0); + uint32_t width = data_width_ < inst_ptr->getEEW() ? data_width_ : inst_ptr->getEEW(); + // Set total number of vector iterations + uint32_t total_number_iterations = inst_ptr->getVL() / width; + inst_ptr->setTotalVLSUIters(total_number_iterations); + // create N memory request objects, push them down mem_request_queue_ + // if not enough space, break and wait until space opens up in mem_request_queue_ + for (uint32_t i = inst_ptr->getCurrVLSUIters(); i < total_number_iterations; ++i) + { + + if (mem_request_queue_.size() < mem_request_queue_size_) + { + // TODO: Address Unroller Class + sparta::memory::addr_t addr = inst_ptr->getTargetVAddr(); + // Need to modify for indexed load/stores + inst_ptr->setTargetVAddr(addr + inst_ptr->getStride()); + LoadStoreInstInfoPtr load_store_info_ptr = createLoadStoreInst_(inst_ptr); + load_store_info_ptr->getMemoryAccessInfoPtr()->setVAddr(inst_ptr->getTargetVAddr()); + const LoadStoreInstIterator & iter = + mem_request_queue_.push_back(load_store_info_ptr); + load_store_info_ptr->setIssueQueueIterator(iter); + uint32_t vector_iter = inst_ptr->getCurrVLSUIters(); + // setting current vlsu iteration + inst_ptr->setCurrVLSUIters(++vector_iter); + load_store_info_ptr->setVLSUStatusState(Inst::Status::DISPATCHED); + handleOperandIssueCheck_(load_store_info_ptr); + ILOG("Generating request: " + << i << " of " << total_number_iterations << " for instruction: " << inst_ptr + << " with vaddr of: " + << load_store_info_ptr->getMemoryAccessInfoPtr()->getVAddr()); + if (i == (total_number_iterations - 1)) + { + load_store_info_ptr->setIsLastMemOp(true); + ILOG("Setting vaddr: " + << load_store_info_ptr->getMemoryAccessInfoPtr()->getVAddr() + << " as last mem op") + } + } + else + { + ILOG("Not enough space in mem_request_queue_") + // not enough space in mem_request_queue_ + break; + } + } + } + + // Callback from Scoreboard to inform Operand Readiness + void VLSU::handleOperandIssueCheck_(const LoadStoreInstInfoPtr & load_store_info_ptr) + { + const InstPtr & inst_ptr = load_store_info_ptr->getInstPtr(); + if (load_store_info_ptr->getVLSUStatusState() == Inst::Status::SCHEDULED) + { + ILOG("Instruction was previously ready " << inst_ptr); + return; + } + + bool all_ready = true; // assume all ready + // address operand check + if (!instOperandReady_(inst_ptr)) + { + all_ready = false; + const auto & src_bits = inst_ptr->getSrcRegisterBitMask(core_types::RF_INTEGER); + scoreboard_views_[core_types::RF_INTEGER]->registerReadyCallback( + src_bits, load_store_info_ptr->getInstPtr()->getUniqueID(), + [this, load_store_info_ptr](const sparta::Scoreboard::RegisterBitMask &) + { this->handleOperandIssueCheck_(load_store_info_ptr); }); + ILOG("Instruction NOT ready: " << inst_ptr << " Address Bits needed:" + << sparta::printBitSet(src_bits)); + } + else + { + // we wait for address operand to be ready before checking data operand in the case of + // stores this way we avoid two live callbacks + if (inst_ptr->isStoreInst()) + { + const auto rf = inst_ptr->getRenameData().getDataReg().rf; + const auto & data_bits = inst_ptr->getDataRegisterBitMask(rf); + // if x0 is a data operand, we don't need to check scoreboard + if (!inst_ptr->getRenameData().getDataReg().is_x0) + { + if (!scoreboard_views_[rf]->isSet(data_bits)) + { + all_ready = false; + scoreboard_views_[rf]->registerReadyCallback( + data_bits, load_store_info_ptr->getInstPtr()->getUniqueID(), + [this, load_store_info_ptr](const sparta::Scoreboard::RegisterBitMask &) + { this->handleOperandIssueCheck_(load_store_info_ptr); }); + ILOG("Instruction NOT ready: " << inst_ptr << " Bits needed:" + << sparta::printBitSet(data_bits)); + } + } + } + else if (false == allow_speculative_load_exec_) + { + // Its a load + // Load instruction is ready is when both address and older stores addresses are + // known + all_ready = allOlderStoresIssued_(inst_ptr); + } + } + // Load are ready when operands are ready + // Stores are ready when both operands and data is ready + // If speculative loads are allowed older store are not checked for Physical address + if (all_ready) + { + // Update issue priority & Schedule an instruction issue event + updateIssuePriorityAfterNewDispatch_(load_store_info_ptr); + + appendToReadyQueue_(load_store_info_ptr); + + // NOTE: + // It is a bug if instruction status is updated as SCHEDULED in the issueInst_() + // The reason is: when issueInst_() is called, it could be scheduled for + // either a new issue event, or a re-issue event + // however, we can ONLY update instruction status as SCHEDULED for a new issue event + + ILOG("Another issue event scheduled " + << inst_ptr << " " << load_store_info_ptr->getMemoryAccessInfoPtr()->getVAddr()); + + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + } + + // Receive update from ROB whenever store instructions retire + void VLSU::getAckFromROB_(const InstPtr & inst_ptr) + { + sparta_assert(inst_ptr->getStatus() == Inst::Status::RETIRED, + "Get ROB Ack, but the store inst hasn't retired yet!"); + + if (inst_ptr->isVector()) + { + ++stores_retired_; + + // updateIssuePriorityAfterStoreInstRetire_(inst_ptr); + if (isReadyToIssueInsts_()) + { + ILOG("ROB Ack issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + + ILOG("ROB Ack: Retired store instruction: " << inst_ptr); + } + } + + // Issue/Re-issue ready instructions in the issue queue + void VLSU::issueInst_() + { + // Instruction issue arbitration + const LoadStoreInstInfoPtr win_ptr = arbitrateInstIssue_(); + // NOTE: + // win_ptr should always point to an instruction ready to be issued + // Otherwise assertion error should already be fired in arbitrateInstIssue_() + ++VLSU_insts_issued_; + // Append load/store pipe + ILOG("Appending to ldst_pipeline: " << win_ptr->getMemoryAccessInfoPtr()->getVAddr()) + ldst_pipeline_.append(win_ptr); + // We append to replay queue to prevent ref count of the shared pointer to drop before + // calling pop below + if (allow_speculative_load_exec_) + { + ILOG("Appending to replay queue " << win_ptr); + appendToReplayQueue_(win_ptr); + } + + // Remove inst from ready queue + win_ptr->setInReadyQueue(false); + + // Update instruction issue info + win_ptr->setState(LoadStoreInstInfo::IssueState::ISSUED); + win_ptr->setPriority(LoadStoreInstInfo::IssuePriority::LOWEST); + + // Schedule another instruction issue event if possible + if (isReadyToIssueInsts_()) + { + ILOG("IssueInst_ issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(1)); + } + } + + void VLSU::handleAddressCalculation_() + { + auto stage_id = address_calculation_stage_; + + if (!ldst_pipeline_.isValid(stage_id)) + { + return; + } + + auto & ldst_info_ptr = ldst_pipeline_[stage_id]; + auto & inst_ptr = ldst_info_ptr->getInstPtr(); + // Assume Calculate Address + + ILOG("Address Generation " << inst_ptr << ldst_info_ptr); + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + //////////////////////////////////////////////////////////////////////////////// + // MMU subroutines + //////////////////////////////////////////////////////////////////////////////// + // Handle MMU access request + void VLSU::handleMMULookupReq_() + { + // Check if flushing event occurred just now + if (!ldst_pipeline_.isValid(mmu_lookup_stage_)) + { + return; + } + + const LoadStoreInstInfoPtr & load_store_info_ptr = ldst_pipeline_[mmu_lookup_stage_]; + const MemoryAccessInfoPtr & mem_access_info_ptr = + load_store_info_ptr->getMemoryAccessInfoPtr(); + + const InstPtr & inst_ptr = load_store_info_ptr->getInstPtr(); + + const bool mmu_bypass = + (mem_access_info_ptr->getMMUState() == MemoryAccessInfo::MMUState::HIT); + + if (mmu_bypass) + { + ILOG("MMU Lookup is skipped (TLB is already hit)! " << load_store_info_ptr); + return; + } + + // Ready dependent younger loads + if (false == allow_speculative_load_exec_) + { + if (inst_ptr->isStoreInst()) + { + readyDependentLoads_(load_store_info_ptr); + } + } + + out_mmu_lookup_req_.send(mem_access_info_ptr); + ILOG(mem_access_info_ptr << load_store_info_ptr << mem_access_info_ptr->getVAddr()); + } + + void VLSU::getAckFromMMU_(const MemoryAccessInfoPtr & updated_memory_access_info_ptr) + { + const auto stage_id = mmu_lookup_stage_; + + // Check if flushing event occurred just now + if (!ldst_pipeline_.isValid(stage_id)) + { + ILOG("MMU stage not valid"); + return; + } + ILOG("MMU Ack: " << std::boolalpha << updated_memory_access_info_ptr->getPhyAddrStatus() + << " " << updated_memory_access_info_ptr); + const bool mmu_hit_ = updated_memory_access_info_ptr->getPhyAddrStatus(); + + if (updated_memory_access_info_ptr->getInstPtr()->isStoreInst() && mmu_hit_ + && allow_speculative_load_exec_) + { + ILOG("Aborting speculative loads " << updated_memory_access_info_ptr); + abortYoungerLoads_(updated_memory_access_info_ptr); + } + } + + void VLSU::handleMMUReadyReq_(const MemoryAccessInfoPtr & memory_access_info_ptr) + { + ILOG("MMU rehandling event is scheduled! " << memory_access_info_ptr); + const auto & inst_ptr = memory_access_info_ptr->getInstPtr(); + + // Update issue priority & Schedule an instruction (re-)issue event + updateIssuePriorityAfterTLBReload_(memory_access_info_ptr); + + if (inst_ptr->getFlushedStatus()) + { + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + return; + } + + removeInstFromReplayQueue_(inst_ptr); + + if (isReadyToIssueInsts_()) + { + ILOG("MMU ready issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + //////////////////////////////////////////////////////////////////////////////// + // Cache Subroutine + //////////////////////////////////////////////////////////////////////////////// + // Handle cache access request + void VLSU::handleCacheLookupReq_() + { + // Check if flushing event occurred just now + if (!ldst_pipeline_.isValid(cache_lookup_stage_)) + { + return; + } + + const LoadStoreInstInfoPtr & load_store_info_ptr = ldst_pipeline_[cache_lookup_stage_]; + ILOG(load_store_info_ptr->getMemoryAccessInfoPtr()->getVAddr()) + const MemoryAccessInfoPtr & mem_access_info_ptr = + load_store_info_ptr->getMemoryAccessInfoPtr(); + const bool phy_addr_is_ready = mem_access_info_ptr->getPhyAddrStatus(); + + // If we did not have an MMU hit from previous stage, invalidate and bail + if (false == phy_addr_is_ready) + { + ILOG("Cache Lookup is skipped (Physical address not ready)!" << load_store_info_ptr); + if (allow_speculative_load_exec_) + { + updateInstReplayReady_(load_store_info_ptr); + } + // There might not be a wake up because the cache cannot handle nay more instruction + // Change to nack wakeup when implemented + if (!load_store_info_ptr->isInReadyQueue()) + { + appendToReadyQueue_(load_store_info_ptr); + load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + ldst_pipeline_.invalidateStage(cache_lookup_stage_); + return; + } + + const InstPtr & inst_ptr = mem_access_info_ptr->getInstPtr(); + ILOG(load_store_info_ptr << " " << mem_access_info_ptr << " " + << load_store_info_ptr->getVLSUStatusState()); + + // If have passed translation and the instruction is a store, + // then it's good to be retired (i.e. mark it completed). + // Stores typically do not cause a flush after a successful + // translation. We now wait for the Retire block to "retire" + // it, meaning it's good to go to the cache + if (inst_ptr->isStoreInst() + && (load_store_info_ptr->getVLSUStatusState() == Inst::Status::SCHEDULED)) + { + ILOG("Store marked as completed " << inst_ptr); + load_store_info_ptr->setVLSUStatusState(Inst::Status::RETIRED); + load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + ldst_pipeline_.invalidateStage(cache_lookup_stage_); + updateIssuePriorityAfterStoreInstRetire_(load_store_info_ptr); + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(1)); + } + if (allow_speculative_load_exec_) + { + updateInstReplayReady_(load_store_info_ptr); + } + return; + } + + // Loads dont perform a cache lookup if there are older stores present in the load store + // queue + if (!inst_ptr->isStoreInst() && olderStoresExists_(inst_ptr) + && allow_speculative_load_exec_) + { + ILOG("Dropping speculative load " << inst_ptr); + load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + ldst_pipeline_.invalidateStage(cache_lookup_stage_); + if (allow_speculative_load_exec_) + { + updateInstReplayReady_(load_store_info_ptr); + } + return; + } + + const bool is_already_hit = + (mem_access_info_ptr->getCacheState() == MemoryAccessInfo::CacheState::HIT); + const bool is_unretired_store = + inst_ptr->isStoreInst() + && (load_store_info_ptr->getVLSUStatusState() != Inst::Status::RETIRED); + const bool cache_bypass = is_already_hit || !phy_addr_is_ready || is_unretired_store; + + if (cache_bypass) + { + if (is_already_hit) + { + ILOG("Cache Lookup is skipped (Cache already hit)"); + } + else if (is_unretired_store) + { + ILOG("Cache Lookup is skipped (store instruction not oldest)"); + } + else + { + sparta_assert(false, "Cache access is bypassed without a valid reason!"); + } + return; + } + + out_cache_lookup_req_.send(mem_access_info_ptr); + } + + void VLSU::getAckFromCache_(const MemoryAccessInfoPtr & mem_access_info_ptr) + { + const LoadStoreInstIterator & iter = mem_access_info_ptr->getIssueQueueIterator(); + if (!iter.isValid()) + { + return; + } + + // Is its a cache miss we dont need to rechedule the instruction + if (!mem_access_info_ptr->isCacheHit()) + { + return; + } + + const LoadStoreInstInfoPtr & inst_info_ptr = *(iter); + + // Update issue priority for this outstanding cache miss + if (inst_info_ptr->getState() != LoadStoreInstInfo::IssueState::ISSUED) + { + inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + + inst_info_ptr->setPriority(LoadStoreInstInfo::IssuePriority::CACHE_RELOAD); + if (!inst_info_ptr->isInReadyQueue()) + { + uev_append_ready_.preparePayload(inst_info_ptr)->schedule(sparta::Clock::Cycle(0)); + } + } + + void VLSU::handleCacheReadyReq_(const MemoryAccessInfoPtr & memory_access_info_ptr) + { + auto inst_ptr = memory_access_info_ptr->getInstPtr(); + if (inst_ptr->getFlushedStatus()) + { + ILOG("BIU Ack for a flushed cache miss is received!"); + + // Schedule an instruction (re-)issue event + // Note: some younger load/store instruction(s) might have been blocked by + // this outstanding miss + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + + return; + } + + ILOG("Cache ready for " << memory_access_info_ptr); + updateIssuePriorityAfterCacheReload_(memory_access_info_ptr); + removeInstFromReplayQueue_(inst_ptr); + + if (isReadyToIssueInsts_()) + { + ILOG("Cache ready issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + void VLSU::handleCacheRead_() + { + // Check if flushing event occurred just now + if (!ldst_pipeline_.isValid(cache_read_stage_)) + { + return; + } + + const LoadStoreInstInfoPtr & load_store_info_ptr = ldst_pipeline_[cache_read_stage_]; + const MemoryAccessInfoPtr & mem_access_info_ptr = + load_store_info_ptr->getMemoryAccessInfoPtr(); + + if (false == mem_access_info_ptr->isCacheHit()) + { + ILOG(mem_access_info_ptr->getCacheState()) + ILOG("Cannot complete inst, cache miss: " << mem_access_info_ptr); + if (allow_speculative_load_exec_) + { + updateInstReplayReady_(load_store_info_ptr); + } + // There might not be a wake up because the cache cannot handle nay more instruction + // Change to nack wakeup when implemented + if (!load_store_info_ptr->isInReadyQueue()) + { + ILOG("Appending to ready queue " << load_store_info_ptr->getInstPtr()) + appendToReadyQueue_(load_store_info_ptr); + load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + ldst_pipeline_.invalidateStage(cache_read_stage_); + return; + } + + if (mem_access_info_ptr->isDataReady()) + { + ILOG("Instruction had previously had its data ready"); + return; + } + + ILOG("Data ready set for " << mem_access_info_ptr); + mem_access_info_ptr->setDataReady(true); + + if (isReadyToIssueInsts_()) + { + ILOG("Cache read issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + // Retire load/store instruction + void VLSU::completeInst_() + { + // For VLSU, the condition for completing an instruction + // is for all memory requests are done. + // Once done we then pop it from inst_queue as well and send to ROB for retiring + + // Check if flushing event occurred just now + if (!ldst_pipeline_.isValid(complete_stage_)) + { + return; + } + const LoadStoreInstInfoPtr & load_store_info_ptr = ldst_pipeline_[complete_stage_]; + uint32_t total_iters = load_store_info_ptr->getInstPtr()->getTotalVLSUIters(); + // we're done load/storing all vector bits, can complete + const MemoryAccessInfoPtr & mem_access_info_ptr = + load_store_info_ptr->getMemoryAccessInfoPtr(); + const InstPtr & inst_ptr = mem_access_info_ptr->getInstPtr(); + if (false == mem_access_info_ptr->isDataReady()) + { + ILOG("Cannot complete inst, cache data is missing: " << mem_access_info_ptr); + return; + } + else + { + // Don't complete inst until we get the last memory request + // For stores, we have to wait for handleCacheLookupReq_ to mark as RETIRED + // For loads we don't wait for that to process it, so we don't gate on that condition + if (inst_ptr->getCurrVLSUIters() >= total_iters && load_store_info_ptr->isLastMemOp() + && (load_store_info_ptr->getVLSUStatusState() == Inst::Status::RETIRED + || !inst_ptr->isStoreInst())) + { + const bool is_store_inst = inst_ptr->isStoreInst(); + ILOG("Completing inst: " << inst_ptr); + inst_queue_.pop(); // pop inst_ptr + if (inst_queue_.size() > 0) + { + uev_gen_mem_ops_.schedule(sparta::Clock::Cycle(0)); + } + + core_types::RegFile reg_file = core_types::RF_INTEGER; + const auto & dests = inst_ptr->getDestOpInfoList(); + if (dests.size() > 0) + { + sparta_assert(dests.size() == 1); // we should only have one destination + reg_file = olympia::coreutils::determineRegisterFile(dests[0]); + const auto & dest_bits = inst_ptr->getDestRegisterBitMask(reg_file); + scoreboard_views_[reg_file]->setReady(dest_bits); + } + + // Complete load instruction + if (!is_store_inst) + { + sparta_assert(mem_access_info_ptr->getCacheState() + == MemoryAccessInfo::CacheState::HIT, + "Load instruction cannot complete when cache is still a miss! " + << mem_access_info_ptr); + + if (isReadyToIssueInsts_()) + { + ILOG("Complete issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + if (load_store_info_ptr->isRetired() + || load_store_info_ptr->getVLSUStatusState() == Inst::Status::COMPLETED) + { + ILOG("Load was previously completed or retired " << load_store_info_ptr); + if (allow_speculative_load_exec_) + { + ILOG("Removed replay " << inst_ptr); + removeInstFromReplayQueue_(load_store_info_ptr); + } + return; + } + + // Mark instruction as completed + inst_ptr->setStatus(Inst::Status::COMPLETED); + // Remove completed instruction from queues + ILOG("Removed issue queue " << inst_ptr); + popIssueQueue_(load_store_info_ptr); + if (allow_speculative_load_exec_) + { + ILOG("Removed replay " << inst_ptr); + removeInstFromReplayQueue_(load_store_info_ptr); + } + + VLSU_insts_completed_++; + out_vlsu_credits_.send(1, 0); + + ILOG("Complete Load Instruction: " << inst_ptr->getMnemonic() << " uid(" + << inst_ptr->getUniqueID() << ")"); + + return; + } + + sparta_assert(mem_access_info_ptr->getCacheState() + == MemoryAccessInfo::CacheState::HIT, + "Store inst cannot finish when cache is still a miss! " << inst_ptr); + + sparta_assert(mem_access_info_ptr->getMMUState() == MemoryAccessInfo::MMUState::HIT, + "Store inst cannot finish when cache is still a miss! " << inst_ptr); + inst_ptr->setStatus(Inst::Status::COMPLETED); + if (isReadyToIssueInsts_()) + { + ILOG("Complete store issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + + if (!load_store_info_ptr->getIssueQueueIterator().isValid()) + { + ILOG("Inst was already retired " << load_store_info_ptr); + if (allow_speculative_load_exec_) + { + ILOG("Removed replay " << load_store_info_ptr); + removeInstFromReplayQueue_(load_store_info_ptr); + } + return; + } + + ILOG("Removed issue queue " << inst_ptr); + popIssueQueue_(load_store_info_ptr); + + if (allow_speculative_load_exec_) + { + ILOG("Removed replay " << load_store_info_ptr); + removeInstFromReplayQueue_(load_store_info_ptr); + } + + VLSU_insts_completed_++; + out_vlsu_credits_.send(1, 0); + + ILOG("Store operation is done!"); + + // NOTE: + // Checking whether an instruction is ready to complete could be non-trivial + // Right now we simply assume: + // (1)Load inst is ready to complete as long as both MMU and cache access finish + // (2)Store inst is ready to complete as long as MMU (address translation) is done + } + else + { + ILOG("Not all mem requests for " + << inst_ptr << " are done yet " + << " currently waiting on: " << inst_ptr->getCurrVLSUIters() << " of " + << total_iters) + if (allow_speculative_load_exec_) + { + removeInstFromReplayQueue_(load_store_info_ptr); + } + if (load_store_info_ptr->getIssueQueueIterator().isValid()) + { + popIssueQueue_(load_store_info_ptr); + } + if (inst_ptr->getCurrVLSUIters() < inst_ptr->getTotalVLSUIters()) + { + // not done generating all memops + uev_gen_mem_ops_.schedule(sparta::Clock::Cycle(0)); + } + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + } + } + + // Handle instruction flush in VLSU + void VLSU::handleFlush_(const FlushCriteria & criteria) + { + ILOG("Start Flushing!"); + + VLSU_flushes_++; + + // Flush load/store pipeline entry + flushLSPipeline_(criteria); + + // Flush instruction issue queue + flushIssueQueue_(criteria); + flushReplayBuffer_(criteria); + flushReadyQueue_(criteria); + + // Cancel replay events + auto flush = [&criteria](const LoadStoreInstInfoPtr & ldst_info_ptr) -> bool + { return criteria.includedInFlush(ldst_info_ptr->getInstPtr()); }; + uev_append_ready_.cancelIf(flush); + uev_replay_ready_.cancelIf(flush); + + // Cancel issue event already scheduled if no ready-to-issue inst left after flush + if (!isReadyToIssueInsts_()) + { + uev_issue_inst_.cancel(); + } + + // NOTE: + // Flush is handled at Flush phase (inbetween PortUpdate phase and Tick phase). + // This also guarantees that whenever an instruction issue event happens, + // instruction issue arbitration should always succeed, even when flush happens. + // Otherwise, assertion error is fired inside arbitrateInstIssue_() + } + + void VLSU::dumpDebugContent_(std::ostream & output) const + { + output << "VLSU Contents" << std::endl; + for (const auto & entry : mem_request_queue_) + { + output << '\t' << entry << " " << entry->getMemoryAccessInfoPtr()->getVAddr() + << std::endl; + } + } + + void VLSU::replayReady_(const LoadStoreInstInfoPtr & replay_inst_ptr) + { + ILOG("Replay inst ready " << replay_inst_ptr); + // We check in the ldst_queue as the instruction may not be in the replay queue + if (replay_inst_ptr->getState() == LoadStoreInstInfo::IssueState::NOT_READY) + { + replay_inst_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + auto issue_priority = replay_inst_ptr->getMemoryAccessInfoPtr()->getPhyAddrStatus() + ? LoadStoreInstInfo::IssuePriority::CACHE_PENDING + : LoadStoreInstInfo::IssuePriority::MMU_PENDING; + replay_inst_ptr->setPriority(issue_priority); + uev_append_ready_.preparePayload(replay_inst_ptr)->schedule(sparta::Clock::Cycle(0)); + + if (isReadyToIssueInsts_()) + { + ILOG("replay ready issue"); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + void VLSU::updateInstReplayReady_(const LoadStoreInstInfoPtr & load_store_info_ptr) + { + ILOG("Scheduled replay " << load_store_info_ptr << " after " << replay_issue_delay_ + << " cycles"); + load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::NOT_READY); + uev_replay_ready_.preparePayload(load_store_info_ptr) + ->schedule(sparta::Clock::Cycle(replay_issue_delay_)); + removeInstFromReplayQueue_(load_store_info_ptr); + + replay_insts_++; + } + + void VLSU::appendReady_(const LoadStoreInstInfoPtr & replay_inst_ptr) + { + ILOG("Appending to Ready ready queue event " << replay_inst_ptr->isInReadyQueue() << " " + << replay_inst_ptr); + if (!replay_inst_ptr->isInReadyQueue() + && !replay_inst_ptr->getReplayQueueIterator().isValid()) + appendToReadyQueue_(replay_inst_ptr); + if (isReadyToIssueInsts_()) + { + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + //////////////////////////////////////////////////////////////////////////////// + // Regular Function/Subroutine Call + //////////////////////////////////////////////////////////////////////////////// + VLSU::LoadStoreInstInfoPtr VLSU::createLoadStoreInst_(const InstPtr & inst_ptr) + { + // Create load/store memory access info + MemoryAccessInfoPtr mem_info_ptr = sparta::allocate_sparta_shared_pointer( + memory_access_allocator_, inst_ptr); + // Create load/store instruction issue info + LoadStoreInstInfoPtr inst_info_ptr = + sparta::allocate_sparta_shared_pointer(load_store_info_allocator_, + mem_info_ptr); + return inst_info_ptr; + } + + void VLSU::allocateInstToIssueQueue_(const InstPtr & inst_ptr) + { + auto inst_info_ptr = createLoadStoreInst_(inst_ptr); + + sparta_assert(mem_request_queue_.size() < mem_request_queue_size_, + "Appending issue queue causes overflows!"); + + // Always append newly dispatched instructions to the back of issue queue + const LoadStoreInstIterator & iter = mem_request_queue_.push_back(inst_info_ptr); + inst_info_ptr->setIssueQueueIterator(iter); + ILOG("Append new load/store instruction to issue queue!"); + } + + bool VLSU::allOlderStoresIssued_(const InstPtr & inst_ptr) + { + for (const auto & ldst_info_ptr : mem_request_queue_) + { + const auto & ldst_inst_ptr = ldst_info_ptr->getInstPtr(); + const auto & mem_info_ptr = ldst_info_ptr->getMemoryAccessInfoPtr(); + if (ldst_inst_ptr->isStoreInst() + && ldst_inst_ptr->getUniqueID() < inst_ptr->getUniqueID() + && !mem_info_ptr->getPhyAddrStatus() && ldst_info_ptr->getInstPtr() != inst_ptr + && ldst_inst_ptr->getUOpID() < inst_ptr->getUOpID()) + { + return false; + } + } + return true; + } + + // Only called if allow_spec_load_exec is true + void VLSU::readyDependentLoads_(const LoadStoreInstInfoPtr & store_inst_ptr) + { + bool found = false; + for (auto & ldst_inst_ptr : mem_request_queue_) + { + auto & inst_ptr = ldst_inst_ptr->getInstPtr(); + if (inst_ptr->isStoreInst()) + { + continue; + } + + // Only ready loads which have register operands ready + // We only care of the instructions which are still not ready + // Instruction have a status of SCHEDULED if they are ready to be issued + if (store_inst_ptr->getVLSUStatusState() == Inst::Status::DISPATCHED + && instOperandReady_(inst_ptr)) + { + ILOG("Updating inst to schedule " << inst_ptr << " " << ldst_inst_ptr); + updateIssuePriorityAfterNewDispatch_(store_inst_ptr); + appendToReadyQueue_(ldst_inst_ptr); + found = true; + } + } + + if (found && isReadyToIssueInsts_()) + { + ILOG("Ready dep inst issue "); + uev_issue_inst_.schedule(sparta::Clock::Cycle(0)); + } + } + + bool VLSU::instOperandReady_(const InstPtr & inst_ptr) + { + return scoreboard_views_[core_types::RF_INTEGER]->isSet( + inst_ptr->getSrcRegisterBitMask(core_types::RF_INTEGER)); + } + + void VLSU::abortYoungerLoads_(const olympia::MemoryAccessInfoPtr & memory_access_info_ptr) + { + auto & inst_ptr = memory_access_info_ptr->getInstPtr(); + uint64_t min_inst_age = UINT64_MAX; + // Find oldest instruction age with the same Virtual address + for (auto iter = replay_buffer_.begin(); iter != replay_buffer_.end(); iter++) + { + auto & queue_inst = (*iter)->getInstPtr(); + // Skip stores or the instruction being compared against + if (queue_inst->isStoreInst() || queue_inst == inst_ptr) + { + continue; + } + // Find loads which have the same address + // Record the oldest age to abort instructions younger than it + if (queue_inst->getTargetVAddr() == inst_ptr->getTargetVAddr() + && queue_inst->getUniqueID() < min_inst_age) + { + min_inst_age = queue_inst->getUniqueID(); + } + } + + if (min_inst_age == UINT64_MAX) + { + ILOG("No younger instruction to deallocate"); + return; + } + + ILOG("Age of the oldest instruction " << min_inst_age << " for " << inst_ptr + << inst_ptr->getTargetVAddr()); + + // Remove instructions younger than the oldest load that was removed + auto iter = replay_buffer_.begin(); + while (iter != replay_buffer_.end()) + { + auto replay_inst_iter(iter++); + auto & replay_inst = *replay_inst_iter; + // Apply to loads only + if (replay_inst->getInstPtr()->isStoreInst()) + { + continue; + } + + if (replay_inst->getInstUniqueID() >= min_inst_age) + { + (replay_inst)->setState(LoadStoreInstInfo::IssueState::READY); + appendToReadyQueue_(replay_inst); + + ILOG("Aborted younger load " + << replay_inst << replay_inst->getInstPtr()->getTargetVAddr() << inst_ptr); + dropInstFromPipeline_(replay_inst); + removeInstFromReplayQueue_(replay_inst); + } + } + } + + // Drop instruction from the pipeline + // Pipeline stages might be multi cycle hence we have check all the stages + void VLSU::dropInstFromPipeline_(const LoadStoreInstInfoPtr & load_store_inst_info_ptr) + { + ILOG("Dropping instruction from pipeline " << load_store_inst_info_ptr); + + for (int stage = 0; stage <= complete_stage_; stage++) + { + if (ldst_pipeline_.isValid(stage)) + { + const auto & pipeline_inst = ldst_pipeline_[stage]; + if (pipeline_inst == load_store_inst_info_ptr) + { + ldst_pipeline_.invalidateStage(stage); + return; + } + } + } + } + + void VLSU::removeInstFromReplayQueue_(const InstPtr & inst_to_remove) + { + ILOG("Removing Inst from replay queue " << inst_to_remove); + for (const auto & ldst_inst : mem_request_queue_) + { + if (ldst_inst->getInstPtr() == inst_to_remove) + { + if (ldst_inst->getReplayQueueIterator().isValid()) + { + removeInstFromReplayQueue_(ldst_inst); + } + else + { + // Handle situations when replay delay completes before mmu/cache is ready + ILOG("Invalid Replay queue entry " << inst_to_remove); + } + } + } + } + + void VLSU::removeInstFromReplayQueue_(const LoadStoreInstInfoPtr & inst_to_remove) + { + ILOG("Removing Inst from replay queue " << inst_to_remove); + if (inst_to_remove->getReplayQueueIterator().isValid()) + replay_buffer_.erase(inst_to_remove->getReplayQueueIterator()); + // Invalidate the iterator manually + inst_to_remove->setReplayQueueIterator(LoadStoreInstIterator()); + } + + // Pop completed load/store instruction out of issue queue + void VLSU::popIssueQueue_(const LoadStoreInstInfoPtr & inst_ptr) + { + ILOG("Removing Inst from issue queue " << inst_ptr); + mem_request_queue_.erase(inst_ptr->getIssueQueueIterator()); + // Invalidate the iterator manually + inst_ptr->setIssueQueueIterator(LoadStoreInstIterator()); + } + + void VLSU::appendToReplayQueue_(const LoadStoreInstInfoPtr & inst_info_ptr) + { + sparta_assert(replay_buffer_.size() < replay_buffer_size_, + "Appending load queue causes overflows!"); + // Always append newly dispatched instructions to the back of issue queue + const auto & iter = replay_buffer_.push_back(inst_info_ptr); + inst_info_ptr->setReplayQueueIterator(iter); + + ILOG("Append new instruction to replay queue!" << inst_info_ptr); + } + + void VLSU::appendToReadyQueue_(const LoadStoreInstInfoPtr & ldst_inst_ptr) + { + for (const auto & inst : mem_request_queue_) + { + if (ldst_inst_ptr->getMemoryAccessInfoPtr()->getVAddr() + == inst->getMemoryAccessInfoPtr()->getVAddr() + && ldst_inst_ptr->getInstPtr() == inst->getInstPtr()) + { + ILOG("Appending to Ready queue " << ldst_inst_ptr); + // appendToReadyQueue_(inst); + ready_queue_.insert(ldst_inst_ptr); + ldst_inst_ptr->setInReadyQueue(true); + ldst_inst_ptr->setState(LoadStoreInstInfo::IssueState::READY); + return; + } + } + sparta_assert(false, "Instruction not found in the issue queue " << ldst_inst_ptr); + // for (const auto & inst : ready_queue_) + // { + // sparta_assert(inst != ldst_inst_ptr, "Instruction in ready queue " << ldst_inst_ptr); + // } + // ready_queue_.insert(ldst_inst_ptr); + // ldst_inst_ptr->setInReadyQueue(true); + // ldst_inst_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + + // Arbitrate instruction issue from ldst_inst_queue + VLSU::LoadStoreInstInfoPtr VLSU::arbitrateInstIssue_() + { + sparta_assert(ready_queue_.size() > 0, "Arbitration fails: issue is empty!"); + + LoadStoreInstInfoPtr ready_inst_ = ready_queue_.top(); + ILOG("Arbitrating instruction, popping from queue: " << ready_inst_->getInstPtr()); + ready_queue_.pop(); + + return ready_inst_; + } + + // Check for ready to issue instructions + bool VLSU::isReadyToIssueInsts_() const + { + if (allow_speculative_load_exec_ && replay_buffer_.size() >= replay_buffer_size_) + { + ILOG("Replay buffer is full"); + return false; + } + + if (!ready_queue_.empty()) + { + return true; + } + + ILOG("No instructions are ready to be issued"); + + return false; + } + + // Update issue priority when newly dispatched instruction comes in + void VLSU::updateIssuePriorityAfterNewDispatch_( + const LoadStoreInstInfoPtr & load_store_inst_info_ptr) + { + ILOG("Issue priority new dispatch " << load_store_inst_info_ptr + << load_store_inst_info_ptr->getInstPtr()); + for (auto & inst_info_ptr : mem_request_queue_) + { + if (inst_info_ptr->getMemoryAccessInfoPtr()->getVAddr() + == load_store_inst_info_ptr->getMemoryAccessInfoPtr()->getVAddr() + && inst_info_ptr->getInstPtr() == load_store_inst_info_ptr->getInstPtr()) + { + inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + inst_info_ptr->setPriority(LoadStoreInstInfo::IssuePriority::NEW_DISP); + // NOTE: + // IssuePriority should always be updated before a new issue event is scheduled. + // This guarantees that whenever a new instruction issue event is scheduled: + // (1)Instruction issue queue already has "something READY"; + // (2)Instruction issue arbitration is guaranteed to be sucessful. + // Update instruction status + inst_info_ptr->setVLSUStatusState(Inst::Status::SCHEDULED); + if (inst_info_ptr->getInstPtr()->getStatus() != Inst::Status::SCHEDULED) + { + inst_info_ptr->getInstPtr()->setStatus(Inst::Status::SCHEDULED); + } + return; + } + } + + sparta_assert( + false, "Attempt to update issue priority for instruction not yet in the issue queue!"); + } + + // Update issue priority after tlb reload + void VLSU::updateIssuePriorityAfterTLBReload_(const MemoryAccessInfoPtr & mem_access_info_ptr) + { + const InstPtr & inst_ptr = mem_access_info_ptr->getInstPtr(); + bool is_found = false; + for (auto & inst_info_ptr : mem_request_queue_) + { + const MemoryAccessInfoPtr & mem_info_ptr = inst_info_ptr->getMemoryAccessInfoPtr(); + if (mem_info_ptr->getMMUState() == MemoryAccessInfo::MMUState::MISS) + { + // Re-activate all TLB-miss-pending instructions in the issue queue + if (!allow_speculative_load_exec_) // Speculative misses are marked as not ready and + // replay event would set them back to ready + { + inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + inst_info_ptr->setPriority(LoadStoreInstInfo::IssuePriority::MMU_PENDING); + } + // NOTE: + // We may not have to re-activate all of the pending MMU miss instruction here + // However, re-activation must be scheduled somewhere else + + if (inst_info_ptr->getInstPtr() == inst_ptr) + { + // Update issue priority for this outstanding TLB miss + if (inst_info_ptr->getState() != LoadStoreInstInfo::IssueState::ISSUED) + { + inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + inst_info_ptr->setPriority(LoadStoreInstInfo::IssuePriority::MMU_RELOAD); + uev_append_ready_.preparePayload(inst_info_ptr)->schedule(sparta::Clock::Cycle(0)); + + // NOTE: + // The priority should be set in such a way that + // the outstanding miss is always re-issued earlier than other pending miss + // Here we have MMU_RELOAD > MMU_PENDING + + is_found = true; + } + } + + sparta_assert(inst_ptr->getFlushedStatus() || is_found, + "Attempt to rehandle TLB lookup for instruction not yet in the issue queue! " + << inst_ptr); + } + + // Update issue priority after cache reload + void VLSU::updateIssuePriorityAfterCacheReload_(const MemoryAccessInfoPtr & mem_access_info_ptr) + { + const InstPtr & inst_ptr = mem_access_info_ptr->getInstPtr(); + + sparta_assert(inst_ptr->getFlushedStatus() == false, + "Attempt to rehandle cache lookup for flushed instruction!"); + + const LoadStoreInstIterator & iter = mem_access_info_ptr->getIssueQueueIterator(); + sparta_assert( + iter.isValid(), + "Attempt to rehandle cache lookup for instruction not yet in the issue queue! " + << mem_access_info_ptr); + + const LoadStoreInstInfoPtr & inst_info_ptr = *(iter); + + // Update issue priority for this outstanding cache miss + if (inst_info_ptr->getState() != LoadStoreInstInfo::IssueState::ISSUED) + { + inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + inst_info_ptr->setPriority(LoadStoreInstInfo::IssuePriority::CACHE_RELOAD); + uev_append_ready_.preparePayload(inst_info_ptr)->schedule(sparta::Clock::Cycle(0)); + } + + // Update issue priority after store instruction retires + void VLSU::updateIssuePriorityAfterStoreInstRetire_(const LoadStoreInstInfoPtr & inst_ptr) + { + if (inst_ptr->getInstPtr()->isVector()) + { + for (auto & inst_info_ptr : mem_request_queue_) + { + if (inst_info_ptr->getMemoryAccessInfoPtr()->getVAddr() + == inst_ptr->getMemoryAccessInfoPtr()->getVAddr()) + { + + if (inst_info_ptr->getState() + != LoadStoreInstInfo::IssueState::ISSUED) // Speculative misses are marked + // as not ready and replay event + // would set them back to ready + { + inst_info_ptr->setState(LoadStoreInstInfo::IssueState::READY); + } + inst_info_ptr->setPriority(LoadStoreInstInfo::IssuePriority::CACHE_PENDING); + uev_append_ready_.preparePayload(inst_info_ptr) + ->schedule(sparta::Clock::Cycle(0)); + + return; + } + } + + sparta_assert( + false, + "Attempt to update issue priority for instruction not yet in the issue queue!"); + } + } + + bool VLSU::olderStoresExists_(const InstPtr & inst_ptr) + { + for (const auto & ldst_inst : mem_request_queue_) + { + const auto & ldst_inst_ptr = ldst_inst->getInstPtr(); + if (ldst_inst_ptr->isStoreInst() + && ldst_inst_ptr->getUniqueID() < inst_ptr->getUniqueID()) + { + return true; + } + } + return false; + } + + // Flush instruction issue queue + void VLSU::flushIssueQueue_(const FlushCriteria & criteria) + { + uint32_t credits_to_send = 0; + + auto iter = mem_request_queue_.begin(); + while (iter != mem_request_queue_.end()) + { + auto inst_ptr = (*iter)->getInstPtr(); + + auto delete_iter = iter++; + + if (criteria.includedInFlush(inst_ptr)) + { + mem_request_queue_.erase(delete_iter); + + // Clear any scoreboard callback + std::vector reg_files = {core_types::RF_INTEGER, + core_types::RF_FLOAT}; + for (const auto rf : reg_files) + { + scoreboard_views_[rf]->clearCallbacks(inst_ptr->getUniqueID()); + } + + // NOTE: + // We cannot increment iter after erase because it's already invalidated by then + + ++credits_to_send; + + ILOG("Flush Instruction ID: " << inst_ptr->getUniqueID()); + } + } + + if (credits_to_send > 0) + { + out_vlsu_credits_.send(credits_to_send); + + ILOG("Flush " << credits_to_send << " instructions in issue queue!"); + } + } + + // Flush load/store pipe + void VLSU::flushLSPipeline_(const FlushCriteria & criteria) + { + uint32_t stage_id = 0; + for (auto iter = ldst_pipeline_.begin(); iter != ldst_pipeline_.end(); iter++, stage_id++) + { + // If the pipe stage is already invalid, no need to criteria + if (!iter.isValid()) + { + continue; + } + + auto inst_ptr = (*iter)->getInstPtr(); + if (criteria.includedInFlush(inst_ptr)) + { + ldst_pipeline_.flushStage(iter); + + ILOG("Flush Pipeline Stage[" << stage_id + << "], Instruction ID: " << inst_ptr->getUniqueID()); + } + } + } + + void VLSU::flushReadyQueue_(const FlushCriteria & criteria) + { + auto iter = ready_queue_.begin(); + while (iter != ready_queue_.end()) + { + auto inst_ptr = (*iter)->getInstPtr(); + + auto delete_iter = iter++; + + if (criteria.includedInFlush(inst_ptr)) + { + ready_queue_.erase(delete_iter); + ILOG("Flushing from ready queue - Instruction ID: " << inst_ptr->getUniqueID()); + } + } + } + + void VLSU::flushReplayBuffer_(const FlushCriteria & criteria) + { + auto iter = replay_buffer_.begin(); + while (iter != replay_buffer_.end()) + { + auto inst_ptr = (*iter)->getInstPtr(); + + auto delete_iter = iter++; + + if (criteria.includedInFlush(inst_ptr)) + { + replay_buffer_.erase(delete_iter); + ILOG("Flushing from replay buffer - Instruction ID: " << inst_ptr->getUniqueID()); + } + } + } +} // namespace olympia diff --git a/core/VLSU.hpp b/core/VLSU.hpp new file mode 100644 index 00000000..940f0d65 --- /dev/null +++ b/core/VLSU.hpp @@ -0,0 +1,360 @@ + +#pragma once + +#include "sparta/ports/PortSet.hpp" +#include "sparta/ports/SignalPort.hpp" +#include "sparta/ports/DataPort.hpp" +#include "sparta/events/EventSet.hpp" +#include "sparta/events/UniqueEvent.hpp" +#include "sparta/simulation/Unit.hpp" +#include "sparta/simulation/ParameterSet.hpp" +#include "sparta/simulation/TreeNode.hpp" +#include "sparta/collection/Collectable.hpp" +#include "sparta/events/StartupEvent.hpp" +#include "sparta/resources/Pipeline.hpp" +#include "sparta/resources/Buffer.hpp" +#include "sparta/resources/PriorityQueue.hpp" +#include "sparta/pairs/SpartaKeyPairs.hpp" +#include "sparta/simulation/State.hpp" +#include "sparta/utils/SpartaSharedPointer.hpp" +#include "sparta/utils/LogUtils.hpp" +#include "sparta/resources/Scoreboard.hpp" + +#include "cache/TreePLRUReplacement.hpp" + +#include "Inst.hpp" +#include "CoreTypes.hpp" +#include "FlushManager.hpp" +#include "CacheFuncModel.hpp" +#include "MemoryAccessInfo.hpp" +#include "LoadStoreInstInfo.hpp" +#include "MMU.hpp" +#include "DCache.hpp" + +namespace olympia +{ + class VLSU : public sparta::Unit + { + public: + /*! + * \class VLSUParameterSet + * \brief Parameters for VLSU model + */ + class VLSUParameterSet : public sparta::ParameterSet + { + public: + //! Constructor for VLSUParameterSet + VLSUParameterSet(sparta::TreeNode* n) : sparta::ParameterSet(n) {} + + // Parameters for ldst_inst_queue + PARAMETER(uint32_t, mem_request_queue_size, 8, "VLSU mem request queue size") + PARAMETER(uint32_t, inst_queue_size, 8, "VLSU inst queue size") + PARAMETER(uint32_t, replay_buffer_size, mem_request_queue_size, "Replay buffer size") + PARAMETER(uint32_t, replay_issue_delay, 3, "Replay Issue delay") + // VLSU microarchitecture parameters + PARAMETER( + bool, allow_speculative_load_exec, true, + "Allow loads to proceed speculatively before all older store addresses are known") + // Pipeline length + PARAMETER(uint32_t, mmu_lookup_stage_length, 1, "Length of the mmu lookup stage") + PARAMETER(uint32_t, cache_lookup_stage_length, 1, "Length of the cache lookup stage") + PARAMETER(uint32_t, cache_read_stage_length, 1, "Length of the cache read stage") + PARAMETER(uint32_t, data_width, 64, "Number of bits load/store per cycle") + }; + + /*! + * \brief Constructor for VLSU + * \note node parameter is the node that represent the VLSU and + * p is the VLSU parameter set + */ + VLSU(sparta::TreeNode* node, const VLSUParameterSet* p); + + //! Destroy the VLSU + ~VLSU(); + + //! name of this resource. + static const char name[]; + + //////////////////////////////////////////////////////////////////////////////// + // Type Name/Alias Declaration + //////////////////////////////////////////////////////////////////////////////// + + using LoadStoreInstInfoPtr = sparta::SpartaSharedPointer; + using LoadStoreInstIterator = sparta::Buffer::const_iterator; + + using FlushCriteria = FlushManager::FlushingCriteria; + + private: + using ScoreboardViews = + std::array, core_types::N_REGFILES>; + + ScoreboardViews scoreboard_views_; + //////////////////////////////////////////////////////////////////////////////// + // Input Ports + //////////////////////////////////////////////////////////////////////////////// + sparta::DataInPort in_vlsu_insts_{&unit_port_set_, "in_vlsu_insts", + 1}; + + sparta::DataInPort in_rob_retire_ack_{&unit_port_set_, "in_rob_retire_ack", 1}; + + sparta::DataInPort in_reorder_flush_{&unit_port_set_, "in_reorder_flush", + sparta::SchedulingPhase::Flush, 1}; + + sparta::DataInPort in_mmu_lookup_req_{&unit_port_set_, + "in_mmu_lookup_req", 1}; + + sparta::DataInPort in_mmu_lookup_ack_{&unit_port_set_, + "in_mmu_lookup_ack", 0}; + + sparta::DataInPort in_cache_lookup_req_{&unit_port_set_, + "in_cache_lookup_req", 1}; + + sparta::DataInPort in_cache_lookup_ack_{&unit_port_set_, + "in_cache_lookup_ack", 0}; + + sparta::SignalInPort in_cache_free_req_{&unit_port_set_, "in_cache_free_req", 0}; + + sparta::SignalInPort in_mmu_free_req_{&unit_port_set_, "in_mmu_free_req", 0}; + + //////////////////////////////////////////////////////////////////////////////// + // Output Ports + //////////////////////////////////////////////////////////////////////////////// + sparta::DataOutPort out_vlsu_credits_{&unit_port_set_, "out_vlsu_credits"}; + + sparta::DataOutPort out_mmu_lookup_req_{&unit_port_set_, + "out_mmu_lookup_req", 0}; + + sparta::DataOutPort out_cache_lookup_req_{&unit_port_set_, + "out_cache_lookup_req", 0}; + + //////////////////////////////////////////////////////////////////////////////// + // Internal States + //////////////////////////////////////////////////////////////////////////////// + + // Issue Queue + using LoadStoreIssueQueue = sparta::Buffer; + // holds loadstoreinfo memory requests + LoadStoreIssueQueue mem_request_queue_; + // holds inst_ptrs until done + // one instruction can have multiple memory requests + InstQueue inst_queue_; + const uint32_t mem_request_queue_size_; + const uint32_t inst_queue_size_; + + sparta::Buffer replay_buffer_; + const uint32_t replay_buffer_size_; + const uint32_t replay_issue_delay_; + + sparta::PriorityQueue ready_queue_; + // MMU unit + bool mmu_busy_ = false; + + // L1 Data Cache + bool cache_busy_ = false; + + uint32_t data_width_; + + sparta::collection::Collectable cache_busy_collectable_{getContainer(), "dcache_busy", + &cache_busy_}; + + // LSInstInfo allocator + LoadStoreInstInfoAllocator & load_store_info_allocator_; + + // allocator for this object type + MemoryAccessInfoAllocator & memory_access_allocator_; + + // NOTE: + // Depending on which kind of cache (e.g. blocking vs. non-blocking) is being used + // This single slot could potentially be extended to a cache pending miss queue + + const int address_calculation_stage_; + const int mmu_lookup_stage_; + const int cache_lookup_stage_; + const int cache_read_stage_; + const int complete_stage_; + + // Load/Store Pipeline + using LoadStorePipeline = sparta::Pipeline; + LoadStorePipeline ldst_pipeline_; + + // VLSU Microarchitecture parameters + const bool allow_speculative_load_exec_; + + // ROB stopped simulation early, transactions could still be inflight. + bool rob_stopped_simulation_ = false; + + //////////////////////////////////////////////////////////////////////////////// + // Event Handlers + //////////////////////////////////////////////////////////////////////////////// + + // Event to issue instruction + sparta::UniqueEvent<> uev_issue_inst_{&unit_event_set_, "issue_inst", + CREATE_SPARTA_HANDLER(VLSU, issueInst_)}; + + sparta::UniqueEvent<> uev_gen_mem_ops_{&unit_event_set_, "gen_mem_ops", + CREATE_SPARTA_HANDLER(VLSU, memRequestGenerator_)}; + + sparta::PayloadEvent uev_replay_ready_{ + &unit_event_set_, "replay_ready", + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, replayReady_, LoadStoreInstInfoPtr)}; + + sparta::PayloadEvent uev_append_ready_{ + &unit_event_set_, "append_ready", + CREATE_SPARTA_HANDLER_WITH_DATA(VLSU, appendReady_, LoadStoreInstInfoPtr)}; + + //////////////////////////////////////////////////////////////////////////////// + // Callbacks + //////////////////////////////////////////////////////////////////////////////// + // Send initial credits (mem_request_queue_size_) to Dispatch Unit + void sendInitialCredits_(); + + // Setup Scoreboard Views + void setupScoreboard_(); + + // Receive new load/store Instruction from Dispatch Unit + void getInstsFromDispatch_(const InstPtr &); + + // Callback from Scoreboard to inform Operand Readiness + void handleOperandIssueCheck_(const LoadStoreInstInfoPtr & inst_ptr); + + // Receive update from ROB whenever store instructions retire + void getAckFromROB_(const InstPtr &); + + // Issue/Re-issue ready instructions in the issue queue + void issueInst_(); + + // Calculate memory load/store address + void handleAddressCalculation_(); + // Handle MMU access request + void handleMMULookupReq_(); + void handleMMUReadyReq_(const MemoryAccessInfoPtr & memory_access_info_ptr); + void getAckFromMMU_(const MemoryAccessInfoPtr & updated_memory_access_info_ptr); + + // Handle cache access request + void handleCacheLookupReq_(); + void handleCacheReadyReq_(const MemoryAccessInfoPtr & memory_access_info_ptr); + void getAckFromCache_(const MemoryAccessInfoPtr & updated_memory_access_info_ptr); + + // Perform cache read + void handleCacheRead_(); + // Retire load/store instruction + void completeInst_(); + + // Handle instruction flush in VLSU + void handleFlush_(const FlushCriteria &); + + // Instructions in the replay ready to issue + void replayReady_(const LoadStoreInstInfoPtr &); + + // Mark instruction as not ready and schedule replay ready + void updateInstReplayReady_(const LoadStoreInstInfoPtr &); + + // Instructions in the replay ready to issue + void appendReady_(const LoadStoreInstInfoPtr &); + + // Called when ROB terminates the simulation + void onROBTerminate_(const bool & val); + + // When simulation is ending (error or not), this function + // will be called + void onStartingTeardown_() override; + + // Typically called when the simulator is shutting down due to an exception + // writes out text to aid debug + // set as protected because VLSU dervies from LSU + void dumpDebugContent_(std::ostream & output) const override final; + + //////////////////////////////////////////////////////////////////////////////// + // Regular Function/Subroutine Call + //////////////////////////////////////////////////////////////////////////////// + + LoadStoreInstInfoPtr createLoadStoreInst_(const InstPtr & inst_ptr); + + void memRequestGenerator_(); + + void allocateInstToIssueQueue_(const InstPtr & inst_ptr); + + bool olderStoresExists_(const InstPtr & inst_ptr); + + bool allOlderStoresIssued_(const InstPtr & inst_ptr); + + void readyDependentLoads_(const LoadStoreInstInfoPtr &); + + bool instOperandReady_(const InstPtr &); + + void abortYoungerLoads_(const olympia::MemoryAccessInfoPtr & memory_access_info_ptr); + + // Remove instruction from pipeline which share the same address + void dropInstFromPipeline_(const LoadStoreInstInfoPtr &); + + // Append new store instruction into replay queue + void appendToReplayQueue_(const LoadStoreInstInfoPtr & inst_info_ptr); + + // Pop completed load/store instruction out of replay queue + void removeInstFromReplayQueue_(const LoadStoreInstInfoPtr & inst_to_remove); + void removeInstFromReplayQueue_(const InstPtr & inst_to_remove); + + void appendToReadyQueue_(const LoadStoreInstInfoPtr &); + + // Pop completed load/store instruction out of issue queue + void popIssueQueue_(const LoadStoreInstInfoPtr &); + + // Arbitrate instruction issue from ldst_inst_queue + LoadStoreInstInfoPtr arbitrateInstIssue_(); + + // Check for ready to issue instructions + bool isReadyToIssueInsts_() const; + + // Update issue priority after dispatch + void updateIssuePriorityAfterNewDispatch_(const LoadStoreInstInfoPtr &); + + // Update issue priority after TLB reload + void updateIssuePriorityAfterTLBReload_(const MemoryAccessInfoPtr &); + + // Update issue priority after cache reload + void updateIssuePriorityAfterCacheReload_(const MemoryAccessInfoPtr &); + + // Update issue priority after store instruction retires + void updateIssuePriorityAfterStoreInstRetire_(const LoadStoreInstInfoPtr &); + + // Flush instruction issue queue + void flushIssueQueue_(const FlushCriteria &); + + // Flush load/store pipeline + void flushLSPipeline_(const FlushCriteria &); + + // Flush Ready Queue + void flushReadyQueue_(const FlushCriteria &); + + // Flush Replay Buffer + void flushReplayBuffer_(const FlushCriteria &); + + void checkSQ_(); + + // Counters + sparta::Counter vlsu_insts_dispatched_{getStatisticSet(), "vlsu_insts_dispatched", + "Number of VLSU instructions dispatched", + sparta::Counter::COUNT_NORMAL}; + sparta::Counter stores_retired_{getStatisticSet(), "stores_retired", + "Number of stores retired", sparta::Counter::COUNT_NORMAL}; + sparta::Counter VLSU_insts_issued_{getStatisticSet(), "VLSU_insts_issued", + "Number of VLSU instructions issued", + sparta::Counter::COUNT_NORMAL}; + sparta::Counter replay_insts_{getStatisticSet(), "replay_insts_", + "Number of Replay instructions issued", + sparta::Counter::COUNT_NORMAL}; + sparta::Counter VLSU_insts_completed_{getStatisticSet(), "VLSU_insts_completed", + "Number of VLSU instructions completed", + sparta::Counter::COUNT_NORMAL}; + sparta::Counter VLSU_flushes_{getStatisticSet(), "VLSU_flushes", + "Number of instruction flushes at VLSU", + sparta::Counter::COUNT_NORMAL}; + + sparta::Counter biu_reqs_{getStatisticSet(), "biu_reqs", "Number of BIU reqs", + sparta::Counter::COUNT_NORMAL}; + + friend class VLSUTester; + }; + + class VLSUTester; +} // namespace olympia diff --git a/core/VectorUopGenerator.cpp b/core/VectorUopGenerator.cpp index 6726c3f0..e5d961ad 100644 --- a/core/VectorUopGenerator.cpp +++ b/core/VectorUopGenerator.cpp @@ -101,7 +101,7 @@ namespace olympia "Inst: " << current_inst_ << " uop gen type is none"); // Number of vector elements processed by each uop - const Inst::VCSRs * current_vcsrs = inst->getVCSRs(); + const Inst::VectorConfig * current_vcsrs = inst->getVectorConfig(); const uint64_t num_elems_per_uop = Inst::VLEN / current_vcsrs->sew; // TODO: For now, generate uops for all elements even if there is a tail num_uops_to_generate_ = std::ceil(current_vcsrs->vlmax / num_elems_per_uop); @@ -132,9 +132,10 @@ namespace olympia uop->setUniqueID(current_inst_->getUniqueID()); uop->setProgramID(current_inst_->getProgramID()); - const Inst::VCSRs * current_vcsrs = current_inst_->getVCSRs(); - uop->setVCSRs(current_vcsrs); + const Inst::VectorConfig * current_vcsrs = current_inst_->getVectorConfig(); + uop->setVectorConfigVCSRs(current_vcsrs); uop->setUOpID(num_uops_generated_); + uop->setVectorConfigVLSU(current_vcsrs); // Set weak pointer to parent vector instruction (first uop) sparta::SpartaWeakPointer parent_weak_ptr = current_inst_; @@ -229,4 +230,4 @@ namespace olympia reset_(); } } -} // namespace olympia +} // namespace olympia \ No newline at end of file diff --git a/test/core/dispatch/test_cores/test_big_core.yaml b/test/core/dispatch/test_cores/test_big_core.yaml index 4119f823..be7f46e9 100644 --- a/test/core/dispatch/test_cores/test_big_core.yaml +++ b/test/core/dispatch/test_cores/test_big_core.yaml @@ -30,29 +30,32 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/test/core/dispatch/test_cores/test_medium_core.yaml b/test/core/dispatch/test_cores/test_medium_core.yaml index a723a59e..2ff81b80 100644 --- a/test/core/dispatch/test_cores/test_medium_core.yaml +++ b/test/core/dispatch/test_cores/test_medium_core.yaml @@ -30,26 +30,29 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/dispatch/test_cores/test_small_core.yaml b/test/core/dispatch/test_cores/test_small_core.yaml index 0d993d95..6e22dce7 100644 --- a/test/core/dispatch/test_cores/test_small_core.yaml +++ b/test/core/dispatch/test_cores/test_small_core.yaml @@ -23,23 +23,26 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/issue_queue/test_cores/test_big_core.yaml b/test/core/issue_queue/test_cores/test_big_core.yaml index 4119f823..be7f46e9 100644 --- a/test/core/issue_queue/test_cores/test_big_core.yaml +++ b/test/core/issue_queue/test_cores/test_big_core.yaml @@ -30,29 +30,32 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/test/core/issue_queue/test_cores/test_big_core_full.yaml b/test/core/issue_queue/test_cores/test_big_core_full.yaml index 18315cad..3217a943 100644 --- a/test/core/issue_queue/test_cores/test_big_core_full.yaml +++ b/test/core/issue_queue/test_cores/test_big_core_full.yaml @@ -30,29 +30,32 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/test/core/l2cache/expected_output/hit_case.out.EXPECTED b/test/core/l2cache/expected_output/hit_case.out.EXPECTED index 619d9ce5..abf1c13a 100644 --- a/test/core/l2cache/expected_output/hit_case.out.EXPECTED +++ b/test/core/l2cache/expected_output/hit_case.out.EXPECTED @@ -3,8 +3,8 @@ #Exe: #SimulatorVersion: #Repro: -#Start: Thursday Thu May 30 21:50:32 2024 -#Elapsed: 0.012536s +#Start: Thursday Thu Aug 1 09:28:43 2024 +#Elapsed: 0.012988s {0000000000 00000000 top.l2cache info} L2Cache: L2Cache construct: #4294967295 {0000000000 00000000 top.l2cache info} sendInitialCredits_: Sending initial credits to ICache : 8 {0000000000 00000000 top.l2cache info} sendInitialCredits_: Sending initial credits to DCache : 8 diff --git a/test/core/l2cache/expected_output/single_access.out.EXPECTED b/test/core/l2cache/expected_output/single_access.out.EXPECTED index 5af68b58..5864b3ef 100644 --- a/test/core/l2cache/expected_output/single_access.out.EXPECTED +++ b/test/core/l2cache/expected_output/single_access.out.EXPECTED @@ -3,8 +3,8 @@ #Exe: #SimulatorVersion: #Repro: -#Start: Thursday Thu May 30 21:50:19 2024 -#Elapsed: 0.015993s +#Start: Thursday Thu Aug 1 09:28:59 2024 +#Elapsed: 0.009035s {0000000000 00000000 top.l2cache info} L2Cache: L2Cache construct: #4294967295 {0000000000 00000000 top.l2cache info} sendInitialCredits_: Sending initial credits to ICache : 8 {0000000000 00000000 top.l2cache info} sendInitialCredits_: Sending initial credits to DCache : 8 diff --git a/test/core/lsu/test_cores/test_small_core.yaml b/test/core/lsu/test_cores/test_small_core.yaml index 0d993d95..6e22dce7 100644 --- a/test/core/lsu/test_cores/test_small_core.yaml +++ b/test/core/lsu/test_cores/test_small_core.yaml @@ -23,23 +23,26 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/lsu/test_cores/test_small_core_full.yaml b/test/core/lsu/test_cores/test_small_core_full.yaml index 6ff1c99c..b59f583b 100644 --- a/test/core/lsu/test_cores/test_small_core_full.yaml +++ b/test/core/lsu/test_cores/test_small_core_full.yaml @@ -39,23 +39,26 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/expected_output/big_core.out.EXPECTED b/test/core/rename/expected_output/big_core.out.EXPECTED index f954c737..a33f5112 100644 --- a/test/core/rename/expected_output/big_core.out.EXPECTED +++ b/test/core/rename/expected_output/big_core.out.EXPECTED @@ -3,8 +3,8 @@ #Exe: #SimulatorVersion: #Repro: -#Start: Tuesday Tue Jul 16 09:09:57 2024 -#Elapsed: 0.003622s +#Start: Wednesday Wed Jul 31 23:12:57 2024 +#Elapsed: 0.065286s {0000000000 00000000 top.dispatch info} Dispatch: mapping target: INTiq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: DIViq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: INTiq1 @@ -71,6 +71,7 @@ {0000000003 00000003 top.execute.iq0 info} handleOperandIssueCheck_: Sending to issue queue uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.rob info} robAppended_: retire appended: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(1) PID(2) add +{0000000003 00000003 top.rob info} retireInstructions_: Retiring {0000000003 00000003 top.rob info} retireInstructions_: num to retire: 1 {0000000003 00000003 top.rob info} retireInstructions_: set oldest: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.execute.iq0 info} sendReadyInsts_: Sending instruction uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' to exe_pipe exe0 @@ -90,6 +91,7 @@ {0000000004 00000004 top.rob info} robAppended_: retire appended: uid: 1 DISPATCHED 0 pid: 2 uopid: 0 'add 4,3,2' {0000000004 00000004 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(2) PID(3) mul {0000000004 00000004 top.execute.exe0 info} executeInst_: Executed inst: uid: 0 SCHEDULED 0 pid: 1 uopid: 0 'add 3,1,2' +{0000000004 00000004 top.rob info} retireInstructions_: Retiring {0000000004 00000004 top.rob info} retireInstructions_: num to retire: 2 {0000000004 00000004 top.dispatch info} dispatchInstructions_: Num to dispatch: 1 {0000000004 00000004 top.dispatch info} acceptInst: iq1: dispatching uid: 2 RENAMED 0 pid: 3 uopid: 0 'mul 13,12,11' diff --git a/test/core/rename/expected_output/big_core_small_rename.out.EXPECTED b/test/core/rename/expected_output/big_core_small_rename.out.EXPECTED index 395d3a52..20674fa4 100644 --- a/test/core/rename/expected_output/big_core_small_rename.out.EXPECTED +++ b/test/core/rename/expected_output/big_core_small_rename.out.EXPECTED @@ -3,8 +3,8 @@ #Exe: #SimulatorVersion: #Repro: -#Start: Tuesday Tue Jul 16 09:09:57 2024 -#Elapsed: 0.00475s +#Start: Wednesday Wed Jul 31 23:13:04 2024 +#Elapsed: 0.005986s {0000000000 00000000 top.dispatch info} Dispatch: mapping target: INTiq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: DIViq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: INTiq1 @@ -71,6 +71,7 @@ {0000000003 00000003 top.execute.iq0 info} handleOperandIssueCheck_: Sending to issue queue uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.rob info} robAppended_: retire appended: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(1) PID(2) add +{0000000003 00000003 top.rob info} retireInstructions_: Retiring {0000000003 00000003 top.rob info} retireInstructions_: num to retire: 1 {0000000003 00000003 top.rob info} retireInstructions_: set oldest: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.execute.iq0 info} sendReadyInsts_: Sending instruction uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' to exe_pipe exe0 @@ -90,6 +91,7 @@ {0000000004 00000004 top.rob info} robAppended_: retire appended: uid: 1 DISPATCHED 0 pid: 2 uopid: 0 'add 4,3,2' {0000000004 00000004 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(2) PID(3) mul {0000000004 00000004 top.execute.exe0 info} executeInst_: Executed inst: uid: 0 SCHEDULED 0 pid: 1 uopid: 0 'add 3,1,2' +{0000000004 00000004 top.rob info} retireInstructions_: Retiring {0000000004 00000004 top.rob info} retireInstructions_: num to retire: 2 {0000000004 00000004 top.dispatch info} dispatchInstructions_: Num to dispatch: 1 {0000000004 00000004 top.dispatch info} acceptInst: iq1: dispatching uid: 2 RENAMED 0 pid: 3 uopid: 0 'mul 13,12,11' diff --git a/test/core/rename/expected_output/medium_core.out.EXPECTED b/test/core/rename/expected_output/medium_core.out.EXPECTED index 5eb25f2f..0133a53d 100644 --- a/test/core/rename/expected_output/medium_core.out.EXPECTED +++ b/test/core/rename/expected_output/medium_core.out.EXPECTED @@ -3,8 +3,8 @@ #Exe: #SimulatorVersion: #Repro: -#Start: Tuesday Tue Jul 16 09:09:57 2024 -#Elapsed: 0.004244s +#Start: Wednesday Wed Jul 31 23:12:52 2024 +#Elapsed: 0.017697s {0000000000 00000000 top.dispatch info} Dispatch: mapping target: INTiq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: MULiq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: I2Fiq0 @@ -64,6 +64,7 @@ {0000000003 00000003 top.execute.iq0 info} handleOperandIssueCheck_: Sending to issue queue uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.rob info} robAppended_: retire appended: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(1) PID(2) add +{0000000003 00000003 top.rob info} retireInstructions_: Retiring {0000000003 00000003 top.rob info} retireInstructions_: num to retire: 1 {0000000003 00000003 top.rob info} retireInstructions_: set oldest: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.execute.iq0 info} sendReadyInsts_: Sending instruction uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' to exe_pipe exe0 @@ -83,6 +84,7 @@ {0000000004 00000004 top.rob info} robAppended_: retire appended: uid: 1 DISPATCHED 0 pid: 2 uopid: 0 'add 4,3,2' {0000000004 00000004 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(2) PID(3) mul {0000000004 00000004 top.execute.exe0 info} executeInst_: Executed inst: uid: 0 SCHEDULED 0 pid: 1 uopid: 0 'add 3,1,2' +{0000000004 00000004 top.rob info} retireInstructions_: Retiring {0000000004 00000004 top.rob info} retireInstructions_: num to retire: 2 {0000000004 00000004 top.dispatch info} dispatchInstructions_: Num to dispatch: 1 {0000000004 00000004 top.dispatch info} acceptInst: iq0: dispatching uid: 2 RENAMED 0 pid: 3 uopid: 0 'mul 13,12,11' diff --git a/test/core/rename/expected_output/small_core.out.EXPECTED b/test/core/rename/expected_output/small_core.out.EXPECTED index 37d343de..5b840851 100644 --- a/test/core/rename/expected_output/small_core.out.EXPECTED +++ b/test/core/rename/expected_output/small_core.out.EXPECTED @@ -3,8 +3,8 @@ #Exe: #SimulatorVersion: #Repro: -#Start: Tuesday Tue Jul 16 09:09:57 2024 -#Elapsed: 0.004802s +#Start: Wednesday Wed Jul 31 23:12:42 2024 +#Elapsed: 0.019088s {0000000000 00000000 top.dispatch info} Dispatch: mapping target: INTiq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: MULiq0 {0000000000 00000000 top.dispatch info} Dispatch: mapping target: I2Fiq0 @@ -58,6 +58,7 @@ {0000000003 00000003 top.execute.iq0 info} handleOperandIssueCheck_: Sending to issue queue uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.rob info} robAppended_: retire appended: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(1) PID(2) add +{0000000003 00000003 top.rob info} retireInstructions_: Retiring {0000000003 00000003 top.rob info} retireInstructions_: num to retire: 1 {0000000003 00000003 top.rob info} retireInstructions_: set oldest: uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' {0000000003 00000003 top.execute.iq0 info} sendReadyInsts_: Sending instruction uid: 0 DISPATCHED 0 pid: 1 uopid: 0 'add 3,1,2' to exe_pipe exe0 @@ -77,6 +78,7 @@ {0000000004 00000004 top.rob info} robAppended_: retire appended: uid: 1 DISPATCHED 0 pid: 2 uopid: 0 'add 4,3,2' {0000000004 00000004 top.dispatch info} dispatchQueueAppended_: queue appended: 0x00000000 UID(2) PID(3) mul {0000000004 00000004 top.execute.exe0 info} executeInst_: Executed inst: uid: 0 SCHEDULED 0 pid: 1 uopid: 0 'add 3,1,2' +{0000000004 00000004 top.rob info} retireInstructions_: Retiring {0000000004 00000004 top.rob info} retireInstructions_: num to retire: 2 {0000000004 00000004 top.dispatch info} dispatchInstructions_: Num to dispatch: 1 {0000000004 00000004 top.dispatch info} acceptInst: iq0: dispatching uid: 2 RENAMED 0 pid: 3 uopid: 0 'mul 13,12,11' diff --git a/test/core/rename/test_cores/test_big_core.yaml b/test/core/rename/test_cores/test_big_core.yaml index 4119f823..3526d736 100644 --- a/test/core/rename/test_cores/test_big_core.yaml +++ b/test/core/rename/test_cores/test_big_core.yaml @@ -30,29 +30,32 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/test_cores/test_big_core_full.yaml b/test/core/rename/test_cores/test_big_core_full.yaml index 18315cad..5b263e9c 100644 --- a/test/core/rename/test_cores/test_big_core_full.yaml +++ b/test/core/rename/test_cores/test_big_core_full.yaml @@ -30,29 +30,32 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/test/core/rename/test_cores/test_big_core_small_rename.yaml b/test/core/rename/test_cores/test_big_core_small_rename.yaml index bf1aaf72..9670b4df 100644 --- a/test/core/rename/test_cores/test_big_core_small_rename.yaml +++ b/test/core/rename/test_cores/test_big_core_small_rename.yaml @@ -40,29 +40,32 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/test_cores/test_big_core_small_rename_full.yaml b/test/core/rename/test_cores/test_big_core_small_rename_full.yaml index 9423dee8..0b70397a 100644 --- a/test/core/rename/test_cores/test_big_core_small_rename_full.yaml +++ b/test/core/rename/test_cores/test_big_core_small_rename_full.yaml @@ -40,29 +40,32 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/test_cores/test_medium_core.yaml b/test/core/rename/test_cores/test_medium_core.yaml index a723a59e..2ff81b80 100644 --- a/test/core/rename/test_cores/test_medium_core.yaml +++ b/test/core/rename/test_cores/test_medium_core.yaml @@ -30,26 +30,29 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/test_cores/test_medium_core_full.yaml b/test/core/rename/test_cores/test_medium_core_full.yaml index 36e40fe3..89314aed 100644 --- a/test/core/rename/test_cores/test_medium_core_full.yaml +++ b/test/core/rename/test_cores/test_medium_core_full.yaml @@ -38,26 +38,29 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4"], - ["lsu", 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4"], + ["lsu", 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/test_cores/test_small_core.yaml b/test/core/rename/test_cores/test_small_core.yaml index 0d993d95..6e22dce7 100644 --- a/test/core/rename/test_cores/test_small_core.yaml +++ b/test/core/rename/test_cores/test_small_core.yaml @@ -23,23 +23,26 @@ top.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/rename/test_cores/test_small_core_full.yaml b/test/core/rename/test_cores/test_small_core_full.yaml index 927db2d4..e2d9253a 100644 --- a/test/core/rename/test_cores/test_small_core_full.yaml +++ b/test/core/rename/test_cores/test_small_core_full.yaml @@ -36,23 +36,26 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3"], - ["lsu", 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1]] \ No newline at end of file + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3"], + ["lsu", 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1]] \ No newline at end of file diff --git a/test/core/vector/CMakeLists.txt b/test/core/vector/CMakeLists.txt index 3fa1aa52..f20f0908 100644 --- a/test/core/vector/CMakeLists.txt +++ b/test/core/vector/CMakeLists.txt @@ -3,20 +3,24 @@ project(Vector_test) add_executable(Vector_test Vector_test.cpp ${SIM_BASE}/sim/OlympiaSim.cpp) target_link_libraries(Vector_test core common_test ${STF_LINK_LIBS} mavis SPARTA::sparta) +add_executable(VLSU_test VLSU_test.cpp ${SIM_BASE}/sim/OlympiaSim.cpp) +target_link_libraries(VLSU_test core common_test ${STF_LINK_LIBS} mavis SPARTA::sparta) + file(CREATE_LINK ${SIM_BASE}/mavis/json ${CMAKE_CURRENT_BINARY_DIR}/mavis_isa_files SYMBOLIC) file(CREATE_LINK ${SIM_BASE}/arches ${CMAKE_CURRENT_BINARY_DIR}/arches SYMBOLIC) file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/test_cores ${CMAKE_CURRENT_BINARY_DIR}/test_cores SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetivli_vaddvv_e8m4.json ${CMAKE_CURRENT_BINARY_DIR}/vsetivli_vaddvv_e8m4.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetvli_vaddvv_e32m1ta.json ${CMAKE_CURRENT_BINARY_DIR}/vsetvli_vaddvv_e32m1ta.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetvl_vaddvv_e64m1ta.json ${CMAKE_CURRENT_BINARY_DIR}/vsetvl_vaddvv_e64m1ta.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetivli_vaddvv_tail_e8m8ta.json ${CMAKE_CURRENT_BINARY_DIR}/vsetivli_vaddvv_tail_e8m8ta.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/multiple_vset.json ${CMAKE_CURRENT_BINARY_DIR}/multiple_vset.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vmulvx_e8m4.json ${CMAKE_CURRENT_BINARY_DIR}/vmulvx_e8m4.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vwmulvv_e8m4.json ${CMAKE_CURRENT_BINARY_DIR}/vwmulvv_e8m4.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vmseqvv_e8m4.json ${CMAKE_CURRENT_BINARY_DIR}/vmseqvv_e8m4.json SYMBOLIC) -file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vrgather.json ${CMAKE_CURRENT_BINARY_DIR}/vrgather.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetivli_vadd_lmul_4.json ${CMAKE_CURRENT_BINARY_DIR}/vsetivli_vadd_lmul_4.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetvl_vadd.json ${CMAKE_CURRENT_BINARY_DIR}/vsetvl_vadd.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetvli_vadd_sew_32.json ${CMAKE_CURRENT_BINARY_DIR}/vsetvli_vadd_sew_32.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vsetvli_vl_max_setting.json ${CMAKE_CURRENT_BINARY_DIR}/vsetvli_vl_max_setting.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/multiple_vset.json ${CMAKE_CURRENT_BINARY_DIR}/multiple_vset.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vmul_transfer.json ${CMAKE_CURRENT_BINARY_DIR}/vmul_transfer.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/undisturbed_checking.json ${CMAKE_CURRENT_BINARY_DIR}/undisturbed_checking.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vlsu_load_lmul_8.json ${CMAKE_CURRENT_BINARY_DIR}/vlsu_load_lmul_8.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vlsu_load_multiple.json ${CMAKE_CURRENT_BINARY_DIR}/vlsu_load_multiple.json SYMBOLIC) +file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/vlsu_store.json ${CMAKE_CURRENT_BINARY_DIR}/vlsu_store.json SYMBOLIC) @@ -30,3 +34,6 @@ sparta_named_test(Vector_test_vmulvv Vector_test big_core.out -c test_cor sparta_named_test(Vector_test_vmseqvv Vector_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vmseqvv_e8m4.json) sparta_named_test(Vector_test_vmaccvv Vector_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vmaccvv_e8m4.json) sparta_named_test(Vector_unsupported_test Vector_test big_core.out -c test_cores/test_big_core_full_8_decode.yaml --input-file vrgather.json) +sparta_named_test(VLSU_test_lmul VLSU_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vlsu_load_lmul_8.json) +sparta_named_test(VLSU_test_load VLSU_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vlsu_load_multiple.json) +sparta_named_test(VLSU_test_store VLSU_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vlsu_store.json) \ No newline at end of file diff --git a/test/core/vector/VLSU_test.cpp b/test/core/vector/VLSU_test.cpp new file mode 100644 index 00000000..7531db5b --- /dev/null +++ b/test/core/vector/VLSU_test.cpp @@ -0,0 +1,125 @@ + +#include "CPUFactory.hpp" +#include "CoreUtils.hpp" +#include "Dispatch.hpp" +#include "MavisUnit.hpp" +#include "OlympiaAllocators.hpp" +#include "OlympiaSim.hpp" +#include "IssueQueue.hpp" +#include "test/core/dispatch/Dispatch_test.hpp" + +#include "sparta/app/CommandLineSimulator.hpp" +#include "sparta/app/Simulation.hpp" +#include "sparta/events/UniqueEvent.hpp" +#include "sparta/kernel/Scheduler.hpp" +#include "sparta/report/Report.hpp" +#include "sparta/resources/Buffer.hpp" +#include "sparta/simulation/ClockManager.hpp" +#include "sparta/sparta.hpp" +#include "sparta/statistics/StatisticSet.hpp" +#include "sparta/utils/SpartaSharedPointer.hpp" +#include "sparta/utils/SpartaTester.hpp" + +#include +#include +#include +#include +#include +TEST_INIT + +//////////////////////////////////////////////////////////////////////////////// +// Set up the Mavis decoder globally for the testing +olympia::InstAllocator inst_allocator(2000, 1000); + +const char USAGE[] = "Usage:\n" + " \n" + "\n"; + +sparta::app::DefaultValues DEFAULTS; +class olympia::VLSUTester +{ +public: + VLSUTester(olympia::VLSU * vlsu) : + vlsu_(vlsu) + {} + + void test_mem_request_count(const uint32_t expected_val) + { + EXPECT_TRUE(vlsu_->inst_queue_.read(0)->getCurrVLSUIters() == expected_val); + } + + +private: + olympia::VLSU * vlsu_; + +}; +void runTests(int argc, char **argv) { + DEFAULTS.auto_summary_default = "off"; + std::vector datafiles; + std::string input_file; + bool enable_vector; + + sparta::app::CommandLineSimulator cls(USAGE, DEFAULTS); + auto &app_opts = cls.getApplicationOptions(); + app_opts.add_options()("output_file", + sparta::app::named_value>( + "output_file", &datafiles), + "Specifies the output file")( + "input-file", + sparta::app::named_value("INPUT_FILE", &input_file) + ->default_value(""), + "Provide a JSON instruction stream", + "Provide a JSON file with instructions to run through Execute")( + "enable_vector", + sparta::app::named_value("enable_vector", &enable_vector) + ->default_value(false), + "Enable the experimental vector pipelines"); + + po::positional_options_description &pos_opts = cls.getPositionalOptions(); + pos_opts.add("output_file", -1); // example, look for the at the end + + int err_code = 0; + if (!cls.parse(argc, argv, err_code)) { + sparta_assert(false, + "Command line parsing failed"); // Any errors already printed to cerr + } + + sparta_assert(false == datafiles.empty(), + "Need an output file as the last argument of the test"); + + uint64_t ilimit = 0; + uint32_t num_cores = 1; + bool show_factories = false; + sparta::Scheduler scheduler; + OlympiaSim sim("simple", scheduler, + num_cores, // cores + input_file, ilimit, show_factories); + sparta::RootTreeNode *root_node = sim.getRoot(); + cls.populateSimulation(&sim); + olympia::VLSU *my_vlsu = \ + root_node->getChild("cpu.core0.vlsu")->getResourceAs(); + olympia::VLSUTester vlsu_tester {my_vlsu}; + + if (input_file.find("vlsu_load_multiple.json") != std::string::npos) { + // Test VLSU + cls.runSimulator(&sim, 68); + vlsu_tester.test_mem_request_count(12); + + + } + else if (input_file.find("vlsu_store.json") != std::string::npos) { + // Test VLSU + cls.runSimulator(&sim, 41); + vlsu_tester.test_mem_request_count(16); + } + else{ + cls.runSimulator(&sim); + } +} + +int main(int argc, char **argv) { + runTests(argc, argv); + + REPORT_ERROR; + return (int)ERROR_CODE; +} diff --git a/test/core/vector/Vector_test.cpp b/test/core/vector/Vector_test.cpp index fe7e41bb..f29a9d05 100644 --- a/test/core/vector/Vector_test.cpp +++ b/test/core/vector/Vector_test.cpp @@ -56,27 +56,27 @@ class olympia::DecodeTester void test_vl(const uint32_t expected_vl) { - EXPECT_TRUE(decode_->VCSRs_.vl == expected_vl); + EXPECT_TRUE(decode_->VectorConfig_.vl == expected_vl); } void test_sew(const uint32_t expected_sew) { - EXPECT_TRUE(decode_->VCSRs_.sew == expected_sew); + EXPECT_TRUE(decode_->VectorConfig_.sew == expected_sew); } void test_lmul(const uint32_t expected_lmul) { - EXPECT_TRUE(decode_->VCSRs_.lmul == expected_lmul); + EXPECT_TRUE(decode_->VectorConfig_.lmul == expected_lmul); } void test_vlmax(const uint32_t expected_vlmax) { - EXPECT_TRUE(decode_->VCSRs_.vlmax == expected_vlmax); + EXPECT_TRUE(decode_->VectorConfig_.vlmax == expected_vlmax); } void test_vta(const bool expected_vta) { - EXPECT_TRUE(decode_->VCSRs_.vta == expected_vta); + EXPECT_TRUE(decode_->VectorConfig_.vta == expected_vta); } private: diff --git a/test/core/vector/test_cores/test_big_core_full.yaml b/test/core/vector/test_cores/test_big_core_full.yaml index 2ea2b8d1..1a6444ea 100644 --- a/test/core/vector/test_cores/test_big_core_full.yaml +++ b/test/core/vector/test_cores/test_big_core_full.yaml @@ -41,29 +41,32 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/test/core/vector/test_cores/test_big_core_full_8_decode.yaml b/test/core/vector/test_cores/test_big_core_full_8_decode.yaml index 26363cfd..b9a787d0 100644 --- a/test/core/vector/test_cores/test_big_core_full_8_decode.yaml +++ b/test/core/vector/test_cores/test_big_core_full_8_decode.yaml @@ -41,29 +41,32 @@ top.cpu.core0.rename.scoreboards: # | # V integer.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] float.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] vector.params.latency_matrix: | - [["", "lsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], - ["lsu", 1, 1, 1, 1, 1, 1, 1], - ["iq0", 1, 1, 1, 1, 1, 1, 1], - ["iq1", 1, 1, 1, 1, 1, 1, 1], - ["iq2", 1, 1, 1, 1, 1, 1, 1], - ["iq3", 1, 1, 1, 1, 1, 1, 1], - ["iq4", 1, 1, 1, 1, 1, 1, 1], - ["iq5", 1, 1, 1, 1, 1, 1, 1]] + [["", "lsu", "vlsu", "iq0", "iq1", "iq2", "iq3", "iq4", "iq5"], + ["lsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["vlsu", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq0", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq1", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq2", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq3", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq4", 1, 1, 1, 1, 1, 1, 1, 1], + ["iq5", 1, 1, 1, 1, 1, 1, 1, 1]] diff --git a/test/core/vector/vlsu_load_lmul_8.json b/test/core/vector/vlsu_load_lmul_8.json new file mode 100644 index 00000000..6461c77e --- /dev/null +++ b/test/core/vector/vlsu_load_lmul_8.json @@ -0,0 +1,18 @@ +[ + { + "mnemonic": "vsetvl", + "rs1": 5, + "rd": 1, + "vtype": "0x3", + "vl": 128 + }, + { + "mnemonic": "vle8.v", + "rs1": 4, + "vd": 10, + "vaddr": "0xdeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + } +] \ No newline at end of file diff --git a/test/core/vector/vlsu_load_multiple.json b/test/core/vector/vlsu_load_multiple.json new file mode 100644 index 00000000..9059852c --- /dev/null +++ b/test/core/vector/vlsu_load_multiple.json @@ -0,0 +1,45 @@ +[ + { + "mnemonic": "vsetvl", + "rs1": 5, + "rd": 1, + "vtype": "0x0", + "vl": 128 + }, + { + "mnemonic": "vle8.v", + "rs1": 4, + "vd": 10, + "vaddr": "0xdeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + }, + { + "mnemonic": "vle8.v", + "rs1": 4, + "vd": 10, + "vaddr": "0xbeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + }, + { + "mnemonic": "vle8.v", + "rs1": 4, + "vd": 10, + "vaddr": "0xceeabeea", + "mop": 0, + "eew": 8, + "stride": 8 + }, + { + "mnemonic": "vle8.v", + "rs1": 4, + "vd": 10, + "vaddr": "0xdeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + } +] \ No newline at end of file diff --git a/test/core/vector/vlsu_store.json b/test/core/vector/vlsu_store.json new file mode 100644 index 00000000..5dc0cfff --- /dev/null +++ b/test/core/vector/vlsu_store.json @@ -0,0 +1,36 @@ +[ + { + "mnemonic": "vsetvl", + "rs1": 5, + "rd": 1, + "vtype": "0x2", + "vl": 128 + }, + { + "mnemonic": "vse8.v", + "rs1": 4, + "vs3": 10, + "vaddr": "0xdeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + }, + { + "mnemonic": "vse8.v", + "rs1": 4, + "vs3": 10, + "vaddr": "0xdeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + }, + { + "mnemonic": "vse8.v", + "rs1": 4, + "vs3": 10, + "vaddr": "0xdeadbeef", + "mop": 0, + "eew": 8, + "stride": 8 + } +] \ No newline at end of file