diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 0b2daaac..32743498 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -5,6 +5,7 @@ add_library(core SimpleBranchPred.cpp Fetch.cpp Decode.cpp + VectorUopGenerator.cpp Rename.cpp Dispatch.cpp Dispatcher.cpp diff --git a/core/CPUFactories.hpp b/core/CPUFactories.hpp index bf0799d5..1a875ea9 100644 --- a/core/CPUFactories.hpp +++ b/core/CPUFactories.hpp @@ -7,6 +7,7 @@ #include "Core.hpp" #include "Fetch.hpp" #include "Decode.hpp" +#include "VectorUopGenerator.hpp" #include "Rename.hpp" #include "Dispatch.hpp" #include "Execute.hpp" @@ -47,6 +48,10 @@ namespace olympia{ sparta::ResourceFactory decode_rf; + //! \brief Resource Factory to build a VectorUopGenerator + sparta::ResourceFactory vec_uop_gen_rf; + //! \brief Resource Factory to build a Rename Unit RenameFactory rename_rf; @@ -56,7 +61,6 @@ namespace olympia{ //! \brief Resource Factory to build a Execute Unit ExecuteFactory execute_rf; - //! \brief Resource Factory to build a MMU Unit sparta::ResourceFactory dcache_rf; diff --git a/core/CPUTopology.cpp b/core/CPUTopology.cpp index 7e08e396..d8fdb0a1 100644 --- a/core/CPUTopology.cpp +++ b/core/CPUTopology.cpp @@ -43,6 +43,14 @@ olympia::CoreTopologySimple::CoreTopologySimple(){ sparta::TreeNode::GROUP_IDX_NONE, &factories->decode_rf }, + { + "vec_uop_gen", + "cpu.core*.decode", + "Vector Uop Generator", + sparta::TreeNode::GROUP_NAME_NONE, + sparta::TreeNode::GROUP_IDX_NONE, + &factories->vec_uop_gen_rf + }, { "rename", "cpu.core*", diff --git a/core/Decode.cpp b/core/Decode.cpp index acd6b692..7186aa13 100644 --- a/core/Decode.cpp +++ b/core/Decode.cpp @@ -1,11 +1,11 @@ // -*- C++ -*- #include "Decode.hpp" +#include "VectorUopGenerator.hpp" #include "fusion/FusionTypes.hpp" #include "sparta/events/StartupEvent.hpp" #include "sparta/utils/LogUtils.hpp" -#include "MavisUnit.hpp" #include #include @@ -78,6 +78,12 @@ namespace olympia mavis_vsetvl_uid_ = mavis_facade_->lookupInstructionUniqueID("vsetvl"); mavis_vsetivli_uid_ = mavis_facade_->lookupInstructionUniqueID("vsetivli"); mavis_vsetvli_uid_ = mavis_facade_->lookupInstructionUniqueID("vsetvli"); + + // Get pointer to the vector uop generator + sparta::TreeNode * root_node = getContainer()->getRoot(); + vec_uop_gen_ = \ + root_node->getChild("cpu.core0.decode.vec_uop_gen")->getResourceAs(); + vec_uop_gen_->setMavis(mavis_facade_); } // ------------------------------------------------------------------- @@ -125,19 +131,38 @@ namespace olympia } } - // process vset settings being forward from execution pipe - // for set instructions that depend on register - void Decode::process_vset_(const InstPtr & inst) + void Decode::updateVcsrs_(const InstPtr & inst) { VCSRs_.setVCSRs(inst->getVL(), inst->getSEW(), inst->getLMUL(), inst->getVTA()); - // AVL setting to VLMAX if rs1 = 0 and rd != 0 - if (inst->getSourceOpInfoList()[0].field_value == 0 - && inst->getDestOpInfoList()[0].field_value != 0) + + const uint64_t uid = inst->getOpCodeInfo()->getInstructionUniqueID(); + if ((uid == mavis_vsetvli_uid_) && inst->hasZeroRegSource()) { - // set vl to vlmax, no need to block, vsetvli when rs1 is 0 - // so we set VL to 0 on setVCSRs_() - VCSRs_.vl = VCSRs_.vlmax; + // If rs1 is x0 and rd is x0 then the vl is unchanged (assuming it is legal) + VCSRs_.vl = inst->hasZeroRegDest() ? std::min(VCSRs_.vl, VCSRs_.vlmax) + : VCSRs_.vlmax; } + + ILOG("Processing vset{i}vl{i} instruction: " << inst); + ILOG(" LMUL: " << VCSRs_.lmul); + ILOG(" SEW: " << VCSRs_.sew); + ILOG(" VTA: " << VCSRs_.vta); + ILOG(" VLMAX: " << VCSRs_.vlmax); + ILOG(" VL: " << VCSRs_.vl); + + // Check validity of vector config + sparta_assert(VCSRs_.lmul <= 8, + "LMUL (" << VCSRs_.lmul << ") cannot be greater than " << 8); + sparta_assert(VCSRs_.vl <= VCSRs_.vlmax, + "VL (" << VCSRs_.vl << ") cannot be greater than VLMAX ("<< VCSRs_.vlmax << ")"); + } + + // process vset settings being forward from execution pipe + // for set instructions that depend on register + void Decode::process_vset_(const InstPtr & inst) + { + updateVcsrs_(inst); + // if rs1 != 0, VL = x[rs1], so we assume there's an STF field for VL if (waiting_on_vset_) { @@ -153,19 +178,22 @@ namespace olympia ILOG("Got a flush call for " << criteria); fetch_queue_credits_outp_.send(fetch_queue_.size()); fetch_queue_.clear(); + + // Reset the vector uop generator + vec_uop_gen_->handleFlush(criteria); } // Decode instructions void Decode::decodeInsts_() { - uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size() + uop_queue_.size()); - num_decode = std::min(num_decode, num_to_decode_); + uint32_t num_to_decode = std::min(uop_queue_credits_, fetch_queue_.size() + uop_queue_.size()); + num_to_decode = std::min(num_to_decode, num_to_decode_); // buffer to maximize the chances of a group match limited // by max allowed latency, bounded by max group size if (fusion_enable_) { - if (num_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_) + if (num_to_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_) { ++latency_count_; return; @@ -174,194 +202,149 @@ namespace olympia latency_count_ = 0; - if (num_decode > 0 && !waiting_on_vset_) - { - InstGroupPtr insts = - sparta::allocate_sparta_shared_pointer(instgroup_allocator); + // For fusion + InstUidListType uids; - InstUidListType uids; - // Send instructions on their way to rename - for (uint32_t i = 0; i < num_decode; ++i) + // Send instructions on their way to rename + InstGroupPtr insts = + sparta::allocate_sparta_shared_pointer(instgroup_allocator); + // if we have a waiting on vset followed by more instructions, we decode + // vset and stall anything else + while ((insts->size() < num_to_decode) && !waiting_on_vset_) + { + if (uop_queue_.size() > 0) { - if (uop_queue_.size() > 0) + const auto & inst = uop_queue_.read(0); + insts->emplace_back(inst); + inst->setStatus(Inst::Status::DECODED); + ILOG("From UOp Queue Decoded: " << inst); + uop_queue_.pop(); + } + else if (fetch_queue_.size() > 0) + { + sparta_assert(fetch_queue_.size() > 0, + "Cannot read from the fetch queue because it is empty!"); + auto & inst = fetch_queue_.read(0); + + // for vector instructions, we block on vset and do not allow any other + // processing of instructions until the vset is resolved optimizations could be + // to allow scalar operations to move forward until a subsequent vector + // instruction is detected or do vset prediction + + // vsetvl always block + // vsetvli only blocks if rs1 is not x0 + // vsetivli never blocks + const uint64_t uid = inst->getOpCodeInfo()->getInstructionUniqueID(); + if ((uid == mavis_vsetivli_uid_) || + ((uid == mavis_vsetvli_uid_) && inst->hasZeroRegSource())) + { + updateVcsrs_(inst); + } + else if (uid == mavis_vsetvli_uid_ || uid == mavis_vsetvl_uid_) { - const auto & inst = uop_queue_.read(0); - insts->emplace_back(inst); - inst->setStatus(Inst::Status::DECODED); - ILOG("From UOp Queue Decoded: " << inst); - uop_queue_.pop(); + // block for vsetvl or vsetvli when rs1 of vsetvli is NOT 0 + waiting_on_vset_ = true; + // need to indicate we want a signal sent back at execute + inst->setBlockingVSET(true); + ILOG("Decode stalled, Waiting on vset that has register dependency: " << inst) } else { - auto & inst = fetch_queue_.read(0); - - // for vector instructions, we block on vset and do not allow any other - // processing of instructions until the vset is resolved optimizations could be - // to allow scalar operations to move forward until a subsequent vector - // instruction is detected or do vset prediction - - // the only two vset instructions that block are vsetvl or vsetvli, - // because both depend on register value - if (inst->getOpCodeInfo()->getInstructionUniqueID() == mavis_vsetivli_uid_) - { - // vsetivli with immediates, we can set at decode and continue to process - // instruction group, no vset stall - VCSRs_.setVCSRs(inst->getVL(), inst->getSEW(), inst->getLMUL(), - inst->getVTA()); - ILOG("Setting vset from VSETIVLI, LMUL: " << VCSRs_.lmul << " SEW: " << VCSRs_.sew - << " VTA: " << VCSRs_.vta - << " VL: " << VCSRs_.vl); - } - else if (inst->getOpCodeInfo()->getInstructionUniqueID() == mavis_vsetvl_uid_ - || inst->getOpCodeInfo()->getInstructionUniqueID() - == mavis_vsetvli_uid_) + if (!inst->isVset() && inst->isVector()) { - // block for vsetvl or vsetvli when rs1 of vsetvli is NOT 0 - waiting_on_vset_ = true; - // need to indicate we want a signal sent back at execute - inst->setBlockingVSET(true); - ILOG("Decode stall due to vset dependency: " << inst); + // set LMUL, VSET, VL, VTA for any other vector instructions + inst->setVCSRs(&VCSRs_); } - else + } + + ILOG("Decoded: " << inst); + insts->emplace_back(inst); + inst->setStatus(Inst::Status::DECODED); + + // Handle vector uop generation + if (inst->isVector() && !inst->isVset()) + { + // If LMUL > 1, fracture instruction into UOps + ILOG("Vector uop gen: " << inst); + vec_uop_gen_->setInst(inst); + + // Original instruction will act as the first UOp + inst->setUOpID(0); // set UOpID() + + while(vec_uop_gen_->getNumUopsRemaining() > 1) { - if (!inst->isVset() && inst->isVector()) + const InstPtr uop = vec_uop_gen_->generateUop(); + if (insts->size() < num_to_decode_) { - // set LMUL, VSET, VL, VTA for any other vector instructions - inst->setVCSRs(VCSRs_); + insts->emplace_back(uop); + uop->setStatus(Inst::Status::DECODED); } - } - if (inst->getLMUL() > 1 && !inst->isVset() && inst->isVector()) - { - // update num_decode based on UOp count as well - num_decode = - std::min(uop_queue_credits_, - fetch_queue_.size() + uop_queue_.size() + inst->getLMUL() - 1); - num_decode = std::min(num_decode, num_to_decode_); - // lmul > 1, fracture instruction into UOps - inst->setUOpID(0); // set UOpID() - ILOG("Inst: " << inst << " is being split into " << VCSRs_.lmul << " UOPs"); - - insts->emplace_back(inst); - inst->setStatus(Inst::Status::DECODED); - inst->setUOpCount(VCSRs_.lmul); - inst->setLMUL(1); // setting LMUL to 1 due to UOp fracture - fetch_queue_.pop(); - for (uint32_t j = 1; j < VCSRs_.lmul; ++j) + else { - i++; // increment decode count to account for UOps - // we create lmul - 1 instructions, because the original instruction - // will also be executed, so we start creating UOPs at vector - // registers + 1 until LMUL - const std::string mnemonic = inst->getMnemonic(); - auto srcs = inst->getSourceOpInfoList(); - for (auto & src : srcs) - { - src.field_value += j; - } - auto dests = inst->getDestOpInfoList(); - for (auto & dest : dests) - { - dest.field_value += j; - } - const auto imm = inst->getImmediate(); - mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests, imm); - InstPtr new_inst = mavis_facade_->makeInstDirectly(ex_info, getClock()); - // setting UOp instructions to have the same UID and PID as parent - // instruction - new_inst->setUniqueID(inst->getUniqueID()); - new_inst->setProgramID(inst->getProgramID()); - InstPtr inst_uop_ptr(new Inst(*new_inst)); - inst_uop_ptr->setVCSRs(VCSRs_); - inst_uop_ptr->setUOpID(j); - inst_uop_ptr->setLMUL(1); // setting LMUL to 1 due to UOp fracture - sparta::SpartaWeakPointer weak_ptr_inst = inst; - inst_uop_ptr->setUOpParent(weak_ptr_inst); - if (i < num_decode) - { - inst_uop_ptr->setTail(VCSRs_.vl / VCSRs_.sew < std::max( - Inst::VLEN / VCSRs_.sew, VCSRs_.vlmax)); - insts->emplace_back(inst_uop_ptr); - inst_uop_ptr->setStatus(Inst::Status::DECODED); - } - else - { - ILOG("Not enough decode credits to process UOp, appending to " - "uop_queue_ " - << inst_uop_ptr); - uop_queue_.push(inst_uop_ptr); - } + ILOG("Not enough decode credits to process UOp, " + "appending to the uop queue: " << uop); + uop_queue_.push(uop); } } - else - { - inst->setTail(VCSRs_.vl / VCSRs_.sew - < std::max(Inst::VLEN / VCSRs_.sew, VCSRs_.vlmax)); - insts->emplace_back(inst); - inst->setStatus(Inst::Status::DECODED); - - if (fusion_enable_) - { - uids.push_back(inst->getMavisUid()); - } - - ILOG("Decoded: " << inst); + } - fetch_queue_.pop(); - if (waiting_on_vset_) - { - // if we have a waiting on vset followed by more instructions, we decode - // vset and stall anything else - break; - } - } + if (fusion_enable_) + { + uids.push_back(inst->getMavisUid()); } + + // Remove from Fetch Queue + fetch_queue_.pop(); } - if (fusion_enable_) + else { - MatchInfoListType matches; - uint32_t max_itrs = 0; - FusionGroupContainerType & container = fuser_->getFusionGroupContainer(); - do - { - matchFusionGroups_(matches, insts, uids, container); - processMatches_(matches, insts, uids); - // Future feature whereIsEgon(insts,numGhosts); - ++max_itrs; - } while (matches.size() > 0 && max_itrs < fusion_match_max_tries_); + // Uop queue and fetch queue are both empty, nothing left to decode + break; + } + } - if (max_itrs >= fusion_match_max_tries_) - { - throw sparta::SpartaException("Fusion group match watch dog exceeded."); - } + if (fusion_enable_) + { + MatchInfoListType matches; + uint32_t max_itrs = 0; + FusionGroupContainerType & container = fuser_->getFusionGroupContainer(); + do + { + matchFusionGroups_(matches, insts, uids, container); + processMatches_(matches, insts, uids); + // Future feature whereIsEgon(insts,numGhosts); + ++max_itrs; + } while (matches.size() > 0 && max_itrs < fusion_match_max_tries_); + + if (max_itrs >= fusion_match_max_tries_) + { + throw sparta::SpartaException("Fusion group match watch dog exceeded."); } // Debug statement - if (fusion_debug_ && fusion_enable_) + if (fusion_debug_) + { infoInsts_(cout, insts); - // Send decoded instructions to rename - uop_queue_outp_.send(insts); + } + } - // TODO: whereisegon() would remove the ghosts, - // Commented out for now, in practice insts - // would be smaller due to the fused ops - // uint32_t unfusedInstsSize = insts->size(); + // Send decoded instructions to rename + sparta_assert(insts->size() <= num_to_decode_, + "Instruction group grew too large! " << insts); + uop_queue_outp_.send(insts); - // Decrement internal Uop Queue credits - sparta_assert(uop_queue_credits_ >= insts->size(), - "Attempt to decrement d0q credits below what is available"); + // TODO: whereisegon() would remove the ghosts, + // Commented out for now, in practice insts + // would be smaller due to the fused ops + // uint32_t unfusedInstsSize = insts->size(); - uop_queue_credits_ -= insts->size(); + // Decrement internal Uop Queue credits + sparta_assert(uop_queue_credits_ >= insts->size(), + "Attempt to decrement d0q credits below what is available"); + uop_queue_credits_ -= insts->size(); - // Send credits back to Fetch to get more instructions - fetch_queue_credits_outp_.send(insts->size()); - } - else - { - if (waiting_on_vset_) - { - ILOG("Waiting on vset that has register dependency") - } - } + // Send credits back to Fetch to get more instructions + fetch_queue_credits_outp_.send(insts->size()); // If we still have credits to send instructions as well as // instructions in the queue, schedule another decode session diff --git a/core/Decode.hpp b/core/Decode.hpp index e60ffd92..4e78bda6 100644 --- a/core/Decode.hpp +++ b/core/Decode.hpp @@ -28,7 +28,7 @@ namespace olympia { - + class VectorUopGenerator; /** * @file Decode.h * @brief Decode instructions from Fetch and send them on @@ -153,6 +153,9 @@ namespace olympia InstQueue fetch_queue_; InstQueue uop_queue_; + // Vector uop generator + VectorUopGenerator * vec_uop_gen_ = nullptr; + // Port listening to the fetch queue appends - Note the 1 cycle delay sparta::DataInPort fetch_queue_write_in_{&unit_port_set_, "in_fetch_queue_write", 1}; @@ -331,7 +334,7 @@ namespace olympia const std::vector fusion_group_definitions_; Inst::VCSRs VCSRs_; - + MavisType* mavis_facade_; uint32_t mavis_vsetvl_uid_; @@ -339,6 +342,10 @@ namespace olympia uint32_t mavis_vsetvli_uid_; bool waiting_on_vset_; + + // Helper method to update VCSRs + void updateVcsrs_(const InstPtr &); + ////////////////////////////////////////////////////////////////////// // Decoder callbacks void sendInitialCredits_(); diff --git a/core/Inst.hpp b/core/Inst.hpp index 166ec65f..41cfd651 100644 --- a/core/Inst.hpp +++ b/core/Inst.hpp @@ -79,14 +79,16 @@ namespace olympia // Vector CSRs struct VCSRs { + uint32_t vl = 16; // vector length uint32_t sew = 8; // set element width uint32_t lmul = 1; // effective length - uint32_t vl = 128; - bool vta = false; // vector tail agnostic, false = undisturbed, true = agnostic + bool vta = false; // vector tail agnostic, false = undisturbed, true = agnostic uint32_t vlmax_formula() { return (VLEN / sew) * lmul; } - void setVCSRs(uint32_t input_vl, uint32_t input_sew, uint32_t input_lmul, + void setVCSRs(uint32_t input_vl, + uint32_t input_sew, + uint32_t input_lmul, uint32_t input_vta) { vl = input_vl; @@ -247,10 +249,18 @@ namespace olympia void setTargetVAddr(sparta::memory::addr_t target_vaddr) { target_vaddr_ = target_vaddr; } // Set lmul from vset (vsetivli, vsetvli) - void setLMUL(uint32_t lmul) { VCSRs_.lmul = lmul; } + void setLMUL(uint32_t lmul) + { + VCSRs_.lmul = lmul; + VCSRs_.vlmax = VCSRs_.vlmax_formula(); + } // Set sew from vset (vsetivli, vsetvli) - void setSEW(uint32_t sew) { VCSRs_.sew = sew; } + void setSEW(uint32_t sew) + { + VCSRs_.sew = sew; + VCSRs_.vlmax = VCSRs_.vlmax_formula(); + } // Set VL from vset (vsetivli, vsetvli) void setVL(uint32_t vl) { VCSRs_.vl = vl; } @@ -262,11 +272,13 @@ namespace olympia void setTail(bool has_tail) { has_tail_ = has_tail; } - void setVCSRs(const VCSRs & inputVCSRs) + void setVCSRs(const VCSRs * inputVCSRs) { - VCSRs_.setVCSRs(inputVCSRs.vl, inputVCSRs.sew, inputVCSRs.lmul, inputVCSRs.vta); + VCSRs_.setVCSRs(inputVCSRs->vl, inputVCSRs->sew, inputVCSRs->lmul, inputVCSRs->vta); } + const VCSRs * getVCSRs() const { return &VCSRs_; } + void setUOpParent(sparta::SpartaWeakPointer & uop_parent) { uop_parent_ = uop_parent; @@ -324,6 +336,24 @@ namespace olympia const OpInfoList & getDestOpInfoList() const { return opcode_info_->getDestOpInfoList(); } + bool hasZeroRegSource() const + { + return std::any_of(getSourceOpInfoList().begin(), getSourceOpInfoList().end(), + [](const mavis::OperandInfo::Element & elem) + { + return elem.field_value == 0; + }); + } + + bool hasZeroRegDest() const + { + return std::any_of(getDestOpInfoList().begin(), getDestOpInfoList().end(), + [](const mavis::OperandInfo::Element & elem) + { + return elem.field_value == 0; + }); + } + // Static instruction information bool isStoreInst() const { return is_store_; } diff --git a/core/IssueQueue.cpp b/core/IssueQueue.cpp index 13d2cdff..91345b2a 100644 --- a/core/IssueQueue.cpp +++ b/core/IssueQueue.cpp @@ -97,70 +97,43 @@ namespace olympia void IssueQueue::handleOperandIssueCheck_(const InstPtr & ex_inst) { - // FIXME: Now every source operand should be ready - auto reg_file = core_types::RegFile::RF_INTEGER; const auto srcs = ex_inst->getRenameData().getSourceList(); - - if(srcs.size() > 1 && srcs[0].rf != srcs[1].rf && ex_inst->isVector()){ - // we have a vector-scalar operation, 1 vector src and 1 scalar src - // need to check both - uint32_t ready = 0; - for(auto src: srcs){ - reg_file = src.rf; - const auto & src_bits = ex_inst->getSrcRegisterBitMask(reg_file); - if (scoreboard_views_[reg_file]->isSet(src_bits)) - { - ready++; - } - else - { - // temporary fix for clearCallbacks not working - scoreboard_views_[reg_file]->registerReadyCallback( - src_bits, ex_inst->getUniqueID(), - [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &) - { this->handleOperandIssueCheck_(ex_inst); }); - ILOG("Instruction NOT ready: " << ex_inst - << " Bits needed:" << sparta::printBitSet(src_bits) << " rf: " << reg_file); - // we break to prevent multiple callbacks from being sent out - break; - } - } - // we wait till the final callback comes back and checks in the case where both RF are ready at the same time - if(ready == srcs.size()){ - // all register file types are ready - ILOG("Sending to issue queue " << ex_inst); - // will insert based on if in_order_issue_ is set - // if it is, will be first in first out, if not it'll be by age, so by UniqueID (UID) - ready_queue_.insert(ex_inst); - ev_issue_ready_inst_.schedule(sparta::Clock::Cycle(0)); - } - } - else{ - if (srcs.size() > 0) - { - reg_file = srcs[0].rf; - } + uint32_t ready = 0; + for(const auto & src : srcs) + { + // vector-scalar operations have 1 vector src and 1 scalar src that + // need to be checked, so can't assume the register files are the + // same for every source + auto reg_file = src.rf; const auto & src_bits = ex_inst->getSrcRegisterBitMask(reg_file); if (scoreboard_views_[reg_file]->isSet(src_bits)) { - // Insert at the end if we are doing in order issue or if the scheduler is - // empty - ILOG("Sending to issue queue " << ex_inst); - // will insert based on if in_order_issue_ is set - // if it is, will be first in first out, if not it'll be by age, so by UniqueID (UID) - ready_queue_.insert(ex_inst); - ev_issue_ready_inst_.schedule(sparta::Clock::Cycle(0)); + ready++; } else { - scoreboard_views_[reg_file]->registerReadyCallback( - src_bits, ex_inst->getUniqueID(), - [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &) - { this->handleOperandIssueCheck_(ex_inst); }); + // temporary fix for clearCallbacks not working + scoreboard_views_[reg_file]->registerReadyCallback(src_bits, ex_inst->getUniqueID(), + [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &) + { this->handleOperandIssueCheck_(ex_inst); }); ILOG("Instruction NOT ready: " << ex_inst - << " Bits needed:" << sparta::printBitSet(src_bits) << " rf: " << reg_file); + << " Bits needed:" << sparta::printBitSet(src_bits) + << " rf: " << reg_file); + // we break to prevent multiple callbacks from being sent out + break; } } + + // we wait till the final callback comes back and checks in the case where both RF are ready at the same time + if(ready == srcs.size()) + { + // all register file types are ready + ILOG("Sending to issue queue " << ex_inst); + // will insert based on if in_order_issue_ is set + // if it is, will be first in first out, if not it'll be by age, so by UniqueID (UID) + ready_queue_.insert(ex_inst); + ev_issue_ready_inst_.schedule(sparta::Clock::Cycle(0)); + } } void IssueQueue::readyExeUnit_(const uint32_t & readyExe) @@ -279,4 +252,4 @@ namespace olympia } sparta_assert(false, "Attempt to complete instruction no longer exiting in issue queue!"); } -} // namespace olympia \ No newline at end of file +} // namespace olympia diff --git a/core/ROB.cpp b/core/ROB.cpp index 8a1a7dd9..502db1a8 100644 --- a/core/ROB.cpp +++ b/core/ROB.cpp @@ -165,6 +165,7 @@ namespace olympia ILOG("retiring " << ex_inst); retire_event_.collect(*ex_inst_ptr); + last_inst_retired_ = ex_inst_ptr; // Use the program ID to verify that the program order has been maintained. sparta_assert(ex_inst.getProgramID() == expected_program_id_, diff --git a/core/ROB.hpp b/core/ROB.hpp index fcb7895d..8a0f9c3c 100644 --- a/core/ROB.hpp +++ b/core/ROB.hpp @@ -118,6 +118,9 @@ namespace olympia // For correlation activities sparta::pevents::PeventCollector retire_event_{"RETIRE", getContainer(), getClock()}; + // Last inst retired for testing + InstPtr last_inst_retired_ = nullptr; + // A nice checker to make sure forward progress is being made // Note that in the ROB constructor, this event is set as non-continuing sparta::Clock::Cycle last_retirement_ = 0; // Last retirement cycle for checking stalled retire @@ -136,5 +139,9 @@ namespace olympia void retireSysInst_(InstPtr & ); + // Friend class used in retire testing + friend class ROBTester; }; + + class ROBTester; } diff --git a/core/Rename.cpp b/core/Rename.cpp index b4696d42..897b1cbd 100644 --- a/core/Rename.cpp +++ b/core/Rename.cpp @@ -205,18 +205,24 @@ namespace olympia sparta_assert(oldest_inst->getUniqueID() == inst_ptr->getUniqueID(), "ROB and rename inst_queue out of sync"); } + + inst_queue_.pop_front(); + + // pop all UOps from inst_queue_ to relaign ROB and rename inst_queue if (inst_ptr->hasUOps()) { - // pop all UOps from inst_queue_ to relaign ROB and rename inst_queue - for (uint32_t i = 0; i < inst_ptr->getLMUL(); i++) + while (inst_queue_.empty() == false) { - inst_queue_.pop_front(); + if (inst_ptr->getUOpID() == inst_queue_.front()->getUOpID()) + { + inst_queue_.pop_front(); + } + else + { + break; + } } } - else - { - inst_queue_.pop_front(); - } } else { @@ -567,4 +573,4 @@ namespace olympia ev_schedule_rename_.schedule(1); } } -} // namespace olympia \ No newline at end of file +} // namespace olympia diff --git a/core/VectorUopGenerator.cpp b/core/VectorUopGenerator.cpp new file mode 100644 index 00000000..d9237d76 --- /dev/null +++ b/core/VectorUopGenerator.cpp @@ -0,0 +1,99 @@ +#include "VectorUopGenerator.hpp" +#include "mavis/Mavis.h" +#include "sparta/utils/LogUtils.hpp" + +namespace olympia +{ + constexpr char VectorUopGenerator::name[]; + + VectorUopGenerator::VectorUopGenerator(sparta::TreeNode* node, const VectorUopGeneratorParameterSet* p) : + sparta::Unit(node) + {} + + void VectorUopGenerator::setInst(const InstPtr & inst) + { + sparta_assert(current_inst_ == nullptr, + "Cannot start generating uops for a new vector instruction, " + "current instruction has not finished: " << current_inst_); + + // Number of vector elements processed by each uop + const Inst::VCSRs * current_VCSRs = inst->getVCSRs(); + const uint64_t num_elems_per_uop = Inst::VLEN / current_VCSRs->sew; + // TODO: For now, generate uops for all elements even if there is a tail + num_uops_to_generate_ = std::ceil(current_VCSRs->vlmax / num_elems_per_uop); + + // Does the instruction have tail elements? + const uint32_t num_elems = current_VCSRs->vl / current_VCSRs->sew; + inst->setTail(num_elems < current_VCSRs->vlmax); + + if(num_uops_to_generate_ > 1) + { + current_inst_ = inst; + current_inst_->setUOpCount(num_uops_to_generate_); + ILOG("Inst: " << current_inst_ << " is being split into " << num_uops_to_generate_ << " UOPs"); + // Inst counts as the first uop + --num_uops_to_generate_; + } + else + { + ILOG("Inst: " << inst << " does not need to generate uops"); + } + } + + const InstPtr VectorUopGenerator::generateUop() + { + ++num_uops_generated_; + + // Increment source and destination register values + // TODO: Different generators will handle this differently + auto srcs = current_inst_->getSourceOpInfoList(); + for (auto & src : srcs) + { + src.field_value += num_uops_generated_; + } + auto dests = current_inst_->getDestOpInfoList(); + for (auto & dest : dests) + { + dest.field_value += num_uops_generated_; + } + + // Create uop + mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), + srcs, + dests, + current_inst_->getImmediate()); + InstPtr uop = mavis_facade_->makeInstDirectly(ex_info, getClock()); + + // setting UOp instructions to have the same UID and PID as parent instruction + uop->setUniqueID(current_inst_->getUniqueID()); + uop->setProgramID(current_inst_->getProgramID()); + + const Inst::VCSRs * current_VCSRs = current_inst_->getVCSRs(); + uop->setVCSRs(current_VCSRs); + uop->setUOpID(num_uops_generated_); + + // Set weak pointer to parent vector instruction (first uop) + sparta::SpartaWeakPointer weak_ptr_inst = current_inst_; + uop->setUOpParent(weak_ptr_inst); + + // Handle last uop + if(num_uops_generated_ == num_uops_to_generate_) + { + const uint32_t num_elems = current_VCSRs->vl / current_VCSRs->sew; + uop->setTail(num_elems < current_VCSRs->vlmax); + + reset_(); + } + + ILOG("Generated uop: " << uop); + return uop; + } + + void VectorUopGenerator::handleFlush(const FlushManager::FlushingCriteria & flush_criteria) + { + if(current_inst_ && flush_criteria.includedInFlush(current_inst_)) + { + reset_(); + } + } +} // namespace olympia diff --git a/core/VectorUopGenerator.hpp b/core/VectorUopGenerator.hpp new file mode 100644 index 00000000..dbd5ce98 --- /dev/null +++ b/core/VectorUopGenerator.hpp @@ -0,0 +1,67 @@ +// -*- C++ -*- +//! \file VectorUopGenerator.hpp +#pragma once + +#include "sparta/simulation/Unit.hpp" +#include "sparta/simulation/TreeNode.hpp" +#include "sparta/simulation/ParameterSet.hpp" + +#include "Inst.hpp" +#include "FlushManager.hpp" +#include "MavisUnit.hpp" + +namespace olympia +{ + + /** + * @file VectorUopGenerator.hpp + * @brief TODO + */ + class VectorUopGenerator : public sparta::Unit + { + public: + //! \brief Parameters for VectorUopGenerator model + class VectorUopGeneratorParameterSet : public sparta::ParameterSet + { + public: + VectorUopGeneratorParameterSet(sparta::TreeNode* n) : sparta::ParameterSet(n) {} + }; + + /** + * @brief Constructor for VectorUopGenerator + * + * @param node The node that represents (has a pointer to) the VectorUopGenerator + * @param p The VectorUopGenerator's parameter set + */ + VectorUopGenerator(sparta::TreeNode* node, const VectorUopGeneratorParameterSet* p); + + //! \brief Name of this resource. Required by sparta::UnitFactory + static constexpr char name[] = "vec_uop_gen"; + + void setMavis(MavisType * mavis) { mavis_facade_ = mavis; } + + void setInst(const InstPtr & inst); + + const InstPtr generateUop(); + + uint64_t getNumUopsRemaining() const { return num_uops_to_generate_; } + + void handleFlush(const FlushManager::FlushingCriteria &); + + private: + MavisType * mavis_facade_; + + // TODO: Use Sparta ValidValue + InstPtr current_inst_ = nullptr; + + uint64_t num_uops_generated_ = 0; + uint64_t num_uops_to_generate_ = 0; + + void reset_() + { + current_inst_ = nullptr; + num_uops_generated_ = 0; + num_uops_to_generate_ = 0; + } + }; +} // namespace olympia diff --git a/test/core/vector/Vector_test.cpp b/test/core/vector/Vector_test.cpp index 0fe23183..09f893d8 100644 --- a/test/core/vector/Vector_test.cpp +++ b/test/core/vector/Vector_test.cpp @@ -66,7 +66,7 @@ class olympia::DecodeTester { void test_VCSRs_sew_32(olympia::Decode &decode) { // test VCSRs EXPECT_TRUE(decode.VCSRs_.lmul == 1); - EXPECT_TRUE(decode.VCSRs_.vl == 512); + EXPECT_TRUE(decode.VCSRs_.vl == 32); EXPECT_TRUE(decode.VCSRs_.vta == 1); EXPECT_TRUE(decode.VCSRs_.sew == 32); } @@ -103,14 +103,18 @@ class olympia::IssueQueueTester { } }; -class olympia::RenameTester{ - public: - void test_hastail(olympia::Rename &rename){ - const auto & renaming_inst = rename.uop_queue_.read(0); - EXPECT_TRUE(renaming_inst->hasTail() == true); - } +class olympia::ROBTester +{ +public: + void test_hastail(olympia::ROB &rob) + { + sparta_assert(rob.last_inst_retired_ != nullptr, + "AHHHH"); + EXPECT_TRUE(rob.last_inst_retired_->hasTail() == true); + } }; -void runIQTest(int argc, char **argv) { + +void runTests(int argc, char **argv) { DEFAULTS.auto_summary_default = "off"; std::vector datafiles; std::string input_file; @@ -133,18 +137,16 @@ void runIQTest(int argc, char **argv) { "Enable the experimental vector pipelines"); po::positional_options_description &pos_opts = cls.getPositionalOptions(); - pos_opts.add("output_file", - -1); // example, look for the at the end + pos_opts.add("output_file", -1); // example, look for the at the end int err_code = 0; if (!cls.parse(argc, argv, err_code)) { - sparta_assert( - false, - "Command line parsing failed"); // Any errors already printed to cerr + sparta_assert(false, + "Command line parsing failed"); // Any errors already printed to cerr } sparta_assert(false == datafiles.empty(), - "Need an output file as the last argument of the test"); + "Need an output file as the last argument of the test"); uint64_t ilimit = 0; uint32_t num_cores = 1; @@ -234,13 +236,13 @@ void runIQTest(int argc, char **argv) { issue_queue_tester.no_inst_issued(*my_issuequeue); } else if(input_file.find("undisturbed_checking.json") != std::string::npos){ - cls.runSimulator(&sim, 9); + cls.runSimulator(&sim, 150); - // Test Rename - olympia::Rename *my_rename = \ - root_node->getChild("cpu.core0.rename")->getResourceAs(); - olympia::RenameTester rename_tester; - rename_tester.test_hastail(*my_rename); + // Test Retire + olympia::ROB *my_rob = \ + root_node->getChild("cpu.core0.rob")->getResourceAs(); + olympia::ROBTester rob_tester; + rob_tester.test_hastail(*my_rob); } else if(input_file.find("vrgather.json") != std::string::npos) { @@ -256,7 +258,7 @@ void runIQTest(int argc, char **argv) { } int main(int argc, char **argv) { - runIQTest(argc, argv); + runTests(argc, argv); REPORT_ERROR; return (int)ERROR_CODE; diff --git a/test/core/vector/undisturbed_checking.json b/test/core/vector/undisturbed_checking.json index e563db16..6754a91d 100644 --- a/test/core/vector/undisturbed_checking.json +++ b/test/core/vector/undisturbed_checking.json @@ -4,7 +4,7 @@ "rs1": 5, "rd": 1, "vtype": "0x3", - "vl": 128 + "vl": 896 }, { "mnemonic": "vadd.vv", @@ -12,4 +12,4 @@ "vs2": 17, "vd": 3 } -] \ No newline at end of file +] diff --git a/test/core/vector/vsetivli_vadd_lmul_4.json b/test/core/vector/vsetivli_vadd_lmul_4.json index 006fc2c3..dde1a6ba 100644 --- a/test/core/vector/vsetivli_vadd_lmul_4.json +++ b/test/core/vector/vsetivli_vadd_lmul_4.json @@ -4,7 +4,7 @@ "rs1": 5, "rd": 1, "vtype": "0x2", - "vl": 1024, + "vl": 512, "vta": 0 }, { @@ -13,4 +13,4 @@ "vs2": 17, "vd": 3 } -] \ No newline at end of file +] diff --git a/test/core/vector/vsetvli_vadd_sew_32.json b/test/core/vector/vsetvli_vadd_sew_32.json index 5b7ff8bf..14749801 100644 --- a/test/core/vector/vsetvli_vadd_sew_32.json +++ b/test/core/vector/vsetvli_vadd_sew_32.json @@ -4,7 +4,7 @@ "rs1": 2, "vtype": "0x10", "rd": 1, - "vl": 512, + "vl": 32, "vta": 1 }, { @@ -13,4 +13,4 @@ "vs2": 17, "vd": 3 } -] \ No newline at end of file +]