diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 0b2daaac..32743498 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -5,6 +5,7 @@ add_library(core
   SimpleBranchPred.cpp
   Fetch.cpp
   Decode.cpp
+  VectorUopGenerator.cpp
   Rename.cpp
   Dispatch.cpp
   Dispatcher.cpp
diff --git a/core/CPUFactories.hpp b/core/CPUFactories.hpp
index bf0799d5..1a875ea9 100644
--- a/core/CPUFactories.hpp
+++ b/core/CPUFactories.hpp
@@ -7,6 +7,7 @@
 #include "Core.hpp"
 #include "Fetch.hpp"
 #include "Decode.hpp"
+#include "VectorUopGenerator.hpp"
 #include "Rename.hpp"
 #include "Dispatch.hpp"
 #include "Execute.hpp"
@@ -47,6 +48,10 @@ namespace olympia{
         sparta::ResourceFactory<olympia::Decode,
                                 olympia::Decode::DecodeParameterSet> decode_rf;
 
+        //! \brief Resource Factory to build a VectorUopGenerator
+        sparta::ResourceFactory<olympia::VectorUopGenerator,
+                                olympia::VectorUopGenerator::VectorUopGeneratorParameterSet> vec_uop_gen_rf;
+
         //! \brief Resource Factory to build a Rename Unit
         RenameFactory rename_rf;
 
@@ -56,7 +61,6 @@ namespace olympia{
         //! \brief Resource Factory to build a Execute Unit
         ExecuteFactory  execute_rf;
 
-
         //! \brief Resource Factory to build a MMU Unit
         sparta::ResourceFactory<olympia::DCache,
                 olympia::DCache::CacheParameterSet> dcache_rf;
diff --git a/core/CPUTopology.cpp b/core/CPUTopology.cpp
index 7e08e396..d8fdb0a1 100644
--- a/core/CPUTopology.cpp
+++ b/core/CPUTopology.cpp
@@ -43,6 +43,14 @@ olympia::CoreTopologySimple::CoreTopologySimple(){
             sparta::TreeNode::GROUP_IDX_NONE,
             &factories->decode_rf
         },
+        {
+            "vec_uop_gen",
+            "cpu.core*.decode",
+            "Vector Uop Generator",
+            sparta::TreeNode::GROUP_NAME_NONE,
+            sparta::TreeNode::GROUP_IDX_NONE,
+            &factories->vec_uop_gen_rf
+        },
         {
             "rename",
             "cpu.core*",
diff --git a/core/Decode.cpp b/core/Decode.cpp
index acd6b692..7186aa13 100644
--- a/core/Decode.cpp
+++ b/core/Decode.cpp
@@ -1,11 +1,11 @@
 // <Decode.cpp> -*- C++ -*-
 
 #include "Decode.hpp"
+#include "VectorUopGenerator.hpp"
 #include "fusion/FusionTypes.hpp"
 
 #include "sparta/events/StartupEvent.hpp"
 #include "sparta/utils/LogUtils.hpp"
-#include "MavisUnit.hpp"
 #include <algorithm>
 #include <iostream>
 
@@ -78,6 +78,12 @@ namespace olympia
         mavis_vsetvl_uid_ = mavis_facade_->lookupInstructionUniqueID("vsetvl");
         mavis_vsetivli_uid_ = mavis_facade_->lookupInstructionUniqueID("vsetivli");
         mavis_vsetvli_uid_ = mavis_facade_->lookupInstructionUniqueID("vsetvli");
+
+        // Get pointer to the vector uop generator
+        sparta::TreeNode * root_node = getContainer()->getRoot();
+        vec_uop_gen_ = \
+            root_node->getChild("cpu.core0.decode.vec_uop_gen")->getResourceAs<olympia::VectorUopGenerator*>();
+        vec_uop_gen_->setMavis(mavis_facade_);
     }
 
     // -------------------------------------------------------------------
@@ -125,19 +131,38 @@ namespace olympia
         }
     }
 
-    // process vset settings being forward from execution pipe
-    // for set instructions that depend on register
-    void Decode::process_vset_(const InstPtr & inst)
+    void Decode::updateVcsrs_(const InstPtr & inst)
     {
         VCSRs_.setVCSRs(inst->getVL(), inst->getSEW(), inst->getLMUL(), inst->getVTA());
-        // AVL setting to VLMAX if rs1 = 0 and rd != 0
-        if (inst->getSourceOpInfoList()[0].field_value == 0
-            && inst->getDestOpInfoList()[0].field_value != 0)
+
+        const uint64_t uid = inst->getOpCodeInfo()->getInstructionUniqueID();
+        if ((uid == mavis_vsetvli_uid_) && inst->hasZeroRegSource())
         {
-            // set vl to vlmax, no need to block, vsetvli when rs1 is 0
-            // so we set VL to 0 on setVCSRs_()
-            VCSRs_.vl = VCSRs_.vlmax;
+            // If rs1 is x0 and rd is x0 then the vl is unchanged (assuming it is legal)
+            VCSRs_.vl = inst->hasZeroRegDest() ? std::min(VCSRs_.vl, VCSRs_.vlmax)
+                                               : VCSRs_.vlmax;
         }
+
+        ILOG("Processing vset{i}vl{i} instruction: " << inst);
+        ILOG("  LMUL: " << VCSRs_.lmul);
+        ILOG("   SEW: " << VCSRs_.sew);
+        ILOG("   VTA: " << VCSRs_.vta);
+        ILOG(" VLMAX: " << VCSRs_.vlmax);
+        ILOG("    VL: " << VCSRs_.vl);
+
+        // Check validity of vector config
+        sparta_assert(VCSRs_.lmul <= 8,
+            "LMUL (" << VCSRs_.lmul << ") cannot be greater than " << 8);
+        sparta_assert(VCSRs_.vl <= VCSRs_.vlmax,
+            "VL (" << VCSRs_.vl << ") cannot be greater than VLMAX ("<< VCSRs_.vlmax << ")");
+    }
+
+    // process vset settings being forward from execution pipe
+    // for set instructions that depend on register
+    void Decode::process_vset_(const InstPtr & inst)
+    {
+        updateVcsrs_(inst);
+
         // if rs1 != 0, VL = x[rs1], so we assume there's an STF field for VL
         if (waiting_on_vset_)
         {
@@ -153,19 +178,22 @@ namespace olympia
         ILOG("Got a flush call for " << criteria);
         fetch_queue_credits_outp_.send(fetch_queue_.size());
         fetch_queue_.clear();
+
+        // Reset the vector uop generator
+        vec_uop_gen_->handleFlush(criteria);
     }
 
     // Decode instructions
     void Decode::decodeInsts_()
     {
-        uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size() + uop_queue_.size());
-        num_decode = std::min(num_decode, num_to_decode_);
+        uint32_t num_to_decode = std::min(uop_queue_credits_, fetch_queue_.size() + uop_queue_.size());
+        num_to_decode = std::min(num_to_decode, num_to_decode_);
 
         // buffer to maximize the chances of a group match limited
         // by max allowed latency, bounded by max group size
         if (fusion_enable_)
         {
-            if (num_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_)
+            if (num_to_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_)
             {
                 ++latency_count_;
                 return;
@@ -174,194 +202,149 @@ namespace olympia
 
         latency_count_ = 0;
 
-        if (num_decode > 0 && !waiting_on_vset_)
-        {
-            InstGroupPtr insts =
-                sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);
+        // For fusion
+        InstUidListType uids;
 
-            InstUidListType uids;
-            // Send instructions on their way to rename
-            for (uint32_t i = 0; i < num_decode; ++i)
+        // Send instructions on their way to rename
+        InstGroupPtr insts =
+            sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);
+        // if we have a waiting on vset followed by more instructions, we decode
+        // vset and stall anything else
+        while ((insts->size() < num_to_decode) && !waiting_on_vset_)
+        {
+            if (uop_queue_.size() > 0)
             {
-                if (uop_queue_.size() > 0)
+                const auto & inst = uop_queue_.read(0);
+                insts->emplace_back(inst);
+                inst->setStatus(Inst::Status::DECODED);
+                ILOG("From UOp Queue Decoded: " << inst);
+                uop_queue_.pop();
+            }
+            else if (fetch_queue_.size() > 0)
+            {
+                sparta_assert(fetch_queue_.size() > 0,
+                     "Cannot read from the fetch queue because it is empty!");
+                auto & inst = fetch_queue_.read(0);
+
+                // for vector instructions, we block on vset and do not allow any other
+                // processing of instructions until the vset is resolved optimizations could be
+                // to allow scalar operations to move forward until a subsequent vector
+                // instruction is detected or do vset prediction
+
+                // vsetvl always block
+                // vsetvli only blocks if rs1 is not x0
+                // vsetivli never blocks
+                const uint64_t uid = inst->getOpCodeInfo()->getInstructionUniqueID();
+                if ((uid == mavis_vsetivli_uid_) ||
+                   ((uid == mavis_vsetvli_uid_) && inst->hasZeroRegSource()))
+                {
+                    updateVcsrs_(inst);
+                }
+                else if (uid == mavis_vsetvli_uid_ || uid == mavis_vsetvl_uid_)
                 {
-                    const auto & inst = uop_queue_.read(0);
-                    insts->emplace_back(inst);
-                    inst->setStatus(Inst::Status::DECODED);
-                    ILOG("From UOp Queue Decoded: " << inst);
-                    uop_queue_.pop();
+                    // block for vsetvl or vsetvli when rs1 of vsetvli is NOT 0
+                    waiting_on_vset_ = true;
+                    // need to indicate we want a signal sent back at execute
+                    inst->setBlockingVSET(true);
+                    ILOG("Decode stalled, Waiting on vset that has register dependency: " << inst)
                 }
                 else
                 {
-                    auto & inst = fetch_queue_.read(0);
-
-                    // for vector instructions, we block on vset and do not allow any other
-                    // processing of instructions until the vset is resolved optimizations could be
-                    // to allow scalar operations to move forward until a subsequent vector
-                    // instruction is detected or do vset prediction
-
-                    // the only two vset instructions that block are vsetvl or vsetvli,
-                    // because both depend on register value
-                    if (inst->getOpCodeInfo()->getInstructionUniqueID() == mavis_vsetivli_uid_)
-                    {
-                        // vsetivli with immediates, we can set at decode and continue to process
-                        // instruction group, no vset stall
-                        VCSRs_.setVCSRs(inst->getVL(), inst->getSEW(), inst->getLMUL(),
-                                        inst->getVTA());
-                        ILOG("Setting vset from VSETIVLI, LMUL: " << VCSRs_.lmul << " SEW: " << VCSRs_.sew
-                                                       << " VTA: " << VCSRs_.vta
-                                                       << " VL: " << VCSRs_.vl);
-                    }
-                    else if (inst->getOpCodeInfo()->getInstructionUniqueID() == mavis_vsetvl_uid_
-                             || inst->getOpCodeInfo()->getInstructionUniqueID()
-                                    == mavis_vsetvli_uid_)
+                    if (!inst->isVset() && inst->isVector())
                     {
-                        // block for vsetvl or vsetvli when rs1 of vsetvli is NOT 0
-                        waiting_on_vset_ = true;
-                        // need to indicate we want a signal sent back at execute
-                        inst->setBlockingVSET(true);
-                        ILOG("Decode stall due to vset dependency: " << inst);
+                        // set LMUL, VSET, VL, VTA for any other vector instructions
+                        inst->setVCSRs(&VCSRs_);
                     }
-                    else
+                }
+
+                ILOG("Decoded: " << inst);
+                insts->emplace_back(inst);
+                inst->setStatus(Inst::Status::DECODED);
+
+                // Handle vector uop generation
+                if (inst->isVector() && !inst->isVset())
+                {
+                    // If LMUL > 1, fracture instruction into UOps
+                    ILOG("Vector uop gen: " << inst);
+                    vec_uop_gen_->setInst(inst);
+
+                    // Original instruction will act as the first UOp
+                    inst->setUOpID(0); // set UOpID()
+
+                    while(vec_uop_gen_->getNumUopsRemaining() > 1)
                     {
-                        if (!inst->isVset() && inst->isVector())
+                        const InstPtr uop = vec_uop_gen_->generateUop();
+                        if (insts->size() < num_to_decode_)
                         {
-                            // set LMUL, VSET, VL, VTA for any other vector instructions
-                            inst->setVCSRs(VCSRs_);
+                            insts->emplace_back(uop);
+                            uop->setStatus(Inst::Status::DECODED);
                         }
-                    }
-                    if (inst->getLMUL() > 1 && !inst->isVset() && inst->isVector())
-                    {
-                        // update num_decode based on UOp count as well
-                        num_decode =
-                            std::min(uop_queue_credits_,
-                                     fetch_queue_.size() + uop_queue_.size() + inst->getLMUL() - 1);
-                        num_decode = std::min(num_decode, num_to_decode_);
-                        // lmul > 1, fracture instruction into UOps
-                        inst->setUOpID(0); // set UOpID()
-                        ILOG("Inst: " << inst << " is being split into " << VCSRs_.lmul << " UOPs");
-
-                        insts->emplace_back(inst);
-                        inst->setStatus(Inst::Status::DECODED);
-                        inst->setUOpCount(VCSRs_.lmul);
-                        inst->setLMUL(1); // setting LMUL to 1 due to UOp fracture
-                        fetch_queue_.pop();
-                        for (uint32_t j = 1; j < VCSRs_.lmul; ++j)
+                        else
                         {
-                            i++; // increment decode count to account for UOps
-                            // we create lmul - 1 instructions, because the original instruction
-                            // will also be executed, so we start creating UOPs at vector
-                            // registers + 1 until LMUL
-                            const std::string mnemonic = inst->getMnemonic();
-                            auto srcs = inst->getSourceOpInfoList();
-                            for (auto & src : srcs)
-                            {
-                                src.field_value += j;
-                            }
-                            auto dests = inst->getDestOpInfoList();
-                            for (auto & dest : dests)
-                            {
-                                dest.field_value += j;
-                            }
-                            const auto imm = inst->getImmediate();
-                            mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests, imm);
-                            InstPtr new_inst = mavis_facade_->makeInstDirectly(ex_info, getClock());
-                            // setting UOp instructions to have the same UID and PID as parent
-                            // instruction
-                            new_inst->setUniqueID(inst->getUniqueID());
-                            new_inst->setProgramID(inst->getProgramID());
-                            InstPtr inst_uop_ptr(new Inst(*new_inst));
-                            inst_uop_ptr->setVCSRs(VCSRs_);
-                            inst_uop_ptr->setUOpID(j);
-                            inst_uop_ptr->setLMUL(1); // setting LMUL to 1 due to UOp fracture
-                            sparta::SpartaWeakPointer<olympia::Inst> weak_ptr_inst = inst;
-                            inst_uop_ptr->setUOpParent(weak_ptr_inst);
-                            if (i < num_decode)
-                            {
-                                inst_uop_ptr->setTail(VCSRs_.vl / VCSRs_.sew < std::max(
-                                                          Inst::VLEN / VCSRs_.sew, VCSRs_.vlmax));
-                                insts->emplace_back(inst_uop_ptr);
-                                inst_uop_ptr->setStatus(Inst::Status::DECODED);
-                            }
-                            else
-                            {
-                                ILOG("Not enough decode credits to process UOp, appending to "
-                                     "uop_queue_ "
-                                     << inst_uop_ptr);
-                                uop_queue_.push(inst_uop_ptr);
-                            }
+                            ILOG("Not enough decode credits to process UOp, "
+                                 "appending to the uop queue: " << uop);
+                            uop_queue_.push(uop);
                         }
                     }
-                    else
-                    {
-                        inst->setTail(VCSRs_.vl / VCSRs_.sew
-                                      < std::max(Inst::VLEN / VCSRs_.sew, VCSRs_.vlmax));
-                        insts->emplace_back(inst);
-                        inst->setStatus(Inst::Status::DECODED);
-
-                        if (fusion_enable_)
-                        {
-                            uids.push_back(inst->getMavisUid());
-                        }
-
-                        ILOG("Decoded: " << inst);
+                }
 
-                        fetch_queue_.pop();
-                        if (waiting_on_vset_)
-                        {
-                            // if we have a waiting on vset followed by more instructions, we decode
-                            // vset and stall anything else
-                            break;
-                        }
-                    }
+                if (fusion_enable_)
+                {
+                    uids.push_back(inst->getMavisUid());
                 }
+
+                // Remove from Fetch Queue
+                fetch_queue_.pop();
             }
-            if (fusion_enable_)
+            else
             {
-                MatchInfoListType matches;
-                uint32_t max_itrs = 0;
-                FusionGroupContainerType & container = fuser_->getFusionGroupContainer();
-                do
-                {
-                    matchFusionGroups_(matches, insts, uids, container);
-                    processMatches_(matches, insts, uids);
-                    // Future feature whereIsEgon(insts,numGhosts);
-                    ++max_itrs;
-                } while (matches.size() > 0 && max_itrs < fusion_match_max_tries_);
+                // Uop queue and fetch queue are both empty, nothing left to decode
+                break;
+            }
+        }
 
-                if (max_itrs >= fusion_match_max_tries_)
-                {
-                    throw sparta::SpartaException("Fusion group match watch dog exceeded.");
-                }
+        if (fusion_enable_)
+        {
+            MatchInfoListType matches;
+            uint32_t max_itrs = 0;
+            FusionGroupContainerType & container = fuser_->getFusionGroupContainer();
+            do
+            {
+                matchFusionGroups_(matches, insts, uids, container);
+                processMatches_(matches, insts, uids);
+                // Future feature whereIsEgon(insts,numGhosts);
+                ++max_itrs;
+            } while (matches.size() > 0 && max_itrs < fusion_match_max_tries_);
+
+            if (max_itrs >= fusion_match_max_tries_)
+            {
+                throw sparta::SpartaException("Fusion group match watch dog exceeded.");
             }
 
             // Debug statement
-            if (fusion_debug_ && fusion_enable_)
+            if (fusion_debug_)
+            {
                 infoInsts_(cout, insts);
-            // Send decoded instructions to rename
-            uop_queue_outp_.send(insts);
+            }
+        }
 
-            // TODO: whereisegon() would remove the ghosts,
-            // Commented out for now, in practice insts
-            // would be smaller due to the fused ops
-            // uint32_t unfusedInstsSize = insts->size();
+        // Send decoded instructions to rename
+        sparta_assert(insts->size() <= num_to_decode_,
+            "Instruction group grew too large! " << insts);
+        uop_queue_outp_.send(insts);
 
-            // Decrement internal Uop Queue credits
-            sparta_assert(uop_queue_credits_ >= insts->size(),
-                          "Attempt to decrement d0q credits below what is available");
+        // TODO: whereisegon() would remove the ghosts,
+        // Commented out for now, in practice insts
+        // would be smaller due to the fused ops
+        // uint32_t unfusedInstsSize = insts->size();
 
-            uop_queue_credits_ -= insts->size();
+        // Decrement internal Uop Queue credits
+        sparta_assert(uop_queue_credits_ >= insts->size(),
+            "Attempt to decrement d0q credits below what is available");
+        uop_queue_credits_ -= insts->size();
 
-            // Send credits back to Fetch to get more instructions
-            fetch_queue_credits_outp_.send(insts->size());
-        }
-        else
-        {
-            if (waiting_on_vset_)
-            {
-                ILOG("Waiting on vset that has register dependency")
-            }
-        }
+        // Send credits back to Fetch to get more instructions
+        fetch_queue_credits_outp_.send(insts->size());
 
         // If we still have credits to send instructions as well as
         // instructions in the queue, schedule another decode session
diff --git a/core/Decode.hpp b/core/Decode.hpp
index e60ffd92..4e78bda6 100644
--- a/core/Decode.hpp
+++ b/core/Decode.hpp
@@ -28,7 +28,7 @@
 
 namespace olympia
 {
-
+    class VectorUopGenerator;
     /**
      * @file   Decode.h
      * @brief Decode instructions from Fetch and send them on
@@ -153,6 +153,9 @@ namespace olympia
         InstQueue fetch_queue_;
         InstQueue uop_queue_;
 
+        // Vector uop generator
+        VectorUopGenerator * vec_uop_gen_ = nullptr;
+
         // Port listening to the fetch queue appends - Note the 1 cycle delay
         sparta::DataInPort<InstGroupPtr> fetch_queue_write_in_{&unit_port_set_,
                                                                "in_fetch_queue_write", 1};
@@ -331,7 +334,7 @@ namespace olympia
         const std::vector<std::string> fusion_group_definitions_;
 
         Inst::VCSRs VCSRs_;
-        
+
         MavisType* mavis_facade_;
 
         uint32_t mavis_vsetvl_uid_;
@@ -339,6 +342,10 @@ namespace olympia
         uint32_t mavis_vsetvli_uid_;
 
         bool waiting_on_vset_;
+
+        // Helper method to update VCSRs
+        void updateVcsrs_(const InstPtr &);
+
         //////////////////////////////////////////////////////////////////////
         // Decoder callbacks
         void sendInitialCredits_();
diff --git a/core/Inst.hpp b/core/Inst.hpp
index 166ec65f..41cfd651 100644
--- a/core/Inst.hpp
+++ b/core/Inst.hpp
@@ -79,14 +79,16 @@ namespace olympia
         // Vector CSRs
         struct VCSRs
         {
+            uint32_t vl = 16;  // vector length
             uint32_t sew = 8;  // set element width
             uint32_t lmul = 1; // effective length
-            uint32_t vl = 128;
-            bool vta = false; // vector tail agnostic, false = undisturbed, true = agnostic
+            bool vta = false;  // vector tail agnostic, false = undisturbed, true = agnostic
 
             uint32_t vlmax_formula() { return (VLEN / sew) * lmul; }
 
-            void setVCSRs(uint32_t input_vl, uint32_t input_sew, uint32_t input_lmul,
+            void setVCSRs(uint32_t input_vl,
+                          uint32_t input_sew,
+                          uint32_t input_lmul,
                           uint32_t input_vta)
             {
                 vl = input_vl;
@@ -247,10 +249,18 @@ namespace olympia
         void setTargetVAddr(sparta::memory::addr_t target_vaddr) { target_vaddr_ = target_vaddr; }
 
         // Set lmul from vset (vsetivli, vsetvli)
-        void setLMUL(uint32_t lmul) { VCSRs_.lmul = lmul; }
+        void setLMUL(uint32_t lmul)
+        {
+            VCSRs_.lmul = lmul;
+            VCSRs_.vlmax = VCSRs_.vlmax_formula();
+        }
 
         // Set sew from vset (vsetivli, vsetvli)
-        void setSEW(uint32_t sew) { VCSRs_.sew = sew; }
+        void setSEW(uint32_t sew)
+        {
+            VCSRs_.sew = sew;
+            VCSRs_.vlmax = VCSRs_.vlmax_formula();
+        }
 
         // Set VL from vset (vsetivli, vsetvli)
         void setVL(uint32_t vl) { VCSRs_.vl = vl; }
@@ -262,11 +272,13 @@ namespace olympia
 
         void setTail(bool has_tail) { has_tail_ = has_tail; }
 
-        void setVCSRs(const VCSRs & inputVCSRs)
+        void setVCSRs(const VCSRs * inputVCSRs)
         {
-            VCSRs_.setVCSRs(inputVCSRs.vl, inputVCSRs.sew, inputVCSRs.lmul, inputVCSRs.vta);
+            VCSRs_.setVCSRs(inputVCSRs->vl, inputVCSRs->sew, inputVCSRs->lmul, inputVCSRs->vta);
         }
 
+        const VCSRs * getVCSRs() const { return &VCSRs_; }
+
         void setUOpParent(sparta::SpartaWeakPointer<olympia::Inst> & uop_parent)
         {
             uop_parent_ = uop_parent;
@@ -324,6 +336,24 @@ namespace olympia
 
         const OpInfoList & getDestOpInfoList() const { return opcode_info_->getDestOpInfoList(); }
 
+        bool hasZeroRegSource() const
+        {
+            return std::any_of(getSourceOpInfoList().begin(), getSourceOpInfoList().end(),
+                [](const mavis::OperandInfo::Element & elem)
+                {
+                    return elem.field_value == 0;
+                });
+        }
+
+        bool hasZeroRegDest() const
+        {
+            return std::any_of(getDestOpInfoList().begin(), getDestOpInfoList().end(),
+                [](const mavis::OperandInfo::Element & elem)
+                {
+                    return elem.field_value == 0;
+                });
+        }
+
         // Static instruction information
         bool isStoreInst() const { return is_store_; }
 
diff --git a/core/IssueQueue.cpp b/core/IssueQueue.cpp
index 13d2cdff..91345b2a 100644
--- a/core/IssueQueue.cpp
+++ b/core/IssueQueue.cpp
@@ -97,70 +97,43 @@ namespace olympia
 
     void IssueQueue::handleOperandIssueCheck_(const InstPtr & ex_inst)
     {
-        // FIXME: Now every source operand should be ready
-        auto reg_file = core_types::RegFile::RF_INTEGER;
         const auto srcs = ex_inst->getRenameData().getSourceList();
-        
-        if(srcs.size() > 1 && srcs[0].rf != srcs[1].rf && ex_inst->isVector()){
-            // we have a vector-scalar operation, 1 vector src and 1 scalar src
-            // need to check both
-            uint32_t ready = 0;
-            for(auto src: srcs){
-                reg_file = src.rf;
-                const auto & src_bits = ex_inst->getSrcRegisterBitMask(reg_file);
-                if (scoreboard_views_[reg_file]->isSet(src_bits))
-                {
-                    ready++;
-                }
-                else
-                {
-                    // temporary fix for clearCallbacks not working
-                    scoreboard_views_[reg_file]->registerReadyCallback(
-                    src_bits, ex_inst->getUniqueID(),
-                    [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &)
-                    { this->handleOperandIssueCheck_(ex_inst); });
-                    ILOG("Instruction NOT ready: " << ex_inst
-                                                << " Bits needed:" << sparta::printBitSet(src_bits) << " rf: " << reg_file);
-                    // we break to prevent multiple callbacks from being sent out
-                    break;
-                }
-            }
-            // we wait till the final callback comes back and checks in the case where both RF are ready at the same time
-            if(ready == srcs.size()){
-                // all register file types are ready
-                ILOG("Sending to issue queue " << ex_inst);
-                // will insert based on if in_order_issue_ is set
-                // if it is, will be first in first out, if not it'll be by age, so by UniqueID (UID)
-                ready_queue_.insert(ex_inst);
-                ev_issue_ready_inst_.schedule(sparta::Clock::Cycle(0));
-            }
-        }
-        else{
-            if (srcs.size() > 0)
-            {
-                reg_file = srcs[0].rf;
-            }
+        uint32_t ready = 0;
+        for(const auto & src : srcs)
+        {
+            // vector-scalar operations have 1 vector src and 1 scalar src that
+            // need to be checked, so can't assume the register files are the
+            // same for every source
+            auto reg_file = src.rf;
             const auto & src_bits = ex_inst->getSrcRegisterBitMask(reg_file);
             if (scoreboard_views_[reg_file]->isSet(src_bits))
             {
-                // Insert at the end if we are doing in order issue or if the scheduler is
-                // empty
-                ILOG("Sending to issue queue " << ex_inst);
-                // will insert based on if in_order_issue_ is set
-                // if it is, will be first in first out, if not it'll be by age, so by UniqueID (UID)
-                ready_queue_.insert(ex_inst);
-                ev_issue_ready_inst_.schedule(sparta::Clock::Cycle(0));
+                ready++;
             }
             else
             {
-                scoreboard_views_[reg_file]->registerReadyCallback(
-                    src_bits, ex_inst->getUniqueID(),
-                    [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &)
-                    { this->handleOperandIssueCheck_(ex_inst); });
+                // temporary fix for clearCallbacks not working
+                scoreboard_views_[reg_file]->registerReadyCallback(src_bits, ex_inst->getUniqueID(),
+                [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &)
+                { this->handleOperandIssueCheck_(ex_inst); });
                 ILOG("Instruction NOT ready: " << ex_inst
-                                                << " Bits needed:" << sparta::printBitSet(src_bits) << " rf: " << reg_file);
+                                               << " Bits needed:" << sparta::printBitSet(src_bits)
+                                               << " rf: " << reg_file);
+                // we break to prevent multiple callbacks from being sent out
+                break;
             }
         }
+
+        // we wait till the final callback comes back and checks in the case where both RF are ready at the same time
+        if(ready == srcs.size())
+        {
+            // all register file types are ready
+            ILOG("Sending to issue queue " << ex_inst);
+            // will insert based on if in_order_issue_ is set
+            // if it is, will be first in first out, if not it'll be by age, so by UniqueID (UID)
+            ready_queue_.insert(ex_inst);
+            ev_issue_ready_inst_.schedule(sparta::Clock::Cycle(0));
+        }
     }
 
     void IssueQueue::readyExeUnit_(const uint32_t & readyExe)
@@ -279,4 +252,4 @@ namespace olympia
         }
         sparta_assert(false, "Attempt to complete instruction no longer exiting in issue queue!");
     }
-} // namespace olympia
\ No newline at end of file
+} // namespace olympia
diff --git a/core/ROB.cpp b/core/ROB.cpp
index 8a1a7dd9..502db1a8 100644
--- a/core/ROB.cpp
+++ b/core/ROB.cpp
@@ -165,6 +165,7 @@ namespace olympia
                 ILOG("retiring " << ex_inst);
 
                 retire_event_.collect(*ex_inst_ptr);
+                last_inst_retired_ = ex_inst_ptr;
 
                 // Use the program ID to verify that the program order has been maintained.
                 sparta_assert(ex_inst.getProgramID() == expected_program_id_,
diff --git a/core/ROB.hpp b/core/ROB.hpp
index fcb7895d..8a0f9c3c 100644
--- a/core/ROB.hpp
+++ b/core/ROB.hpp
@@ -118,6 +118,9 @@ namespace olympia
         // For correlation activities
         sparta::pevents::PeventCollector<InstPEventPairs> retire_event_{"RETIRE", getContainer(), getClock()};
 
+        // Last inst retired for testing
+        InstPtr last_inst_retired_ = nullptr;
+
         // A nice checker to make sure forward progress is being made
         // Note that in the ROB constructor, this event is set as non-continuing
         sparta::Clock::Cycle last_retirement_ = 0; // Last retirement cycle for checking stalled retire
@@ -136,5 +139,9 @@ namespace olympia
 
         void retireSysInst_(InstPtr & );
 
+        // Friend class used in retire testing
+        friend class ROBTester;
     };
+
+    class ROBTester;
 }
diff --git a/core/Rename.cpp b/core/Rename.cpp
index b4696d42..897b1cbd 100644
--- a/core/Rename.cpp
+++ b/core/Rename.cpp
@@ -205,18 +205,24 @@ namespace olympia
                 sparta_assert(oldest_inst->getUniqueID() == inst_ptr->getUniqueID(),
                               "ROB and rename inst_queue out of sync");
             }
+
+            inst_queue_.pop_front();
+
+            // pop all UOps from inst_queue_ to relaign ROB and rename inst_queue
             if (inst_ptr->hasUOps())
             {
-                // pop all UOps from inst_queue_ to relaign ROB and rename inst_queue
-                for (uint32_t i = 0; i < inst_ptr->getLMUL(); i++)
+                while (inst_queue_.empty() == false)
                 {
-                    inst_queue_.pop_front();
+                    if (inst_ptr->getUOpID() == inst_queue_.front()->getUOpID())
+                    {
+                        inst_queue_.pop_front();
+                    }
+                    else
+                    {
+                        break;
+                    }
                 }
             }
-            else
-            {
-                inst_queue_.pop_front();
-            }
         }
         else
         {
@@ -567,4 +573,4 @@ namespace olympia
             ev_schedule_rename_.schedule(1);
         }
     }
-} // namespace olympia
\ No newline at end of file
+} // namespace olympia
diff --git a/core/VectorUopGenerator.cpp b/core/VectorUopGenerator.cpp
new file mode 100644
index 00000000..d9237d76
--- /dev/null
+++ b/core/VectorUopGenerator.cpp
@@ -0,0 +1,99 @@
+#include "VectorUopGenerator.hpp"
+#include "mavis/Mavis.h"
+#include "sparta/utils/LogUtils.hpp"
+
+namespace olympia
+{
+    constexpr char VectorUopGenerator::name[];
+
+    VectorUopGenerator::VectorUopGenerator(sparta::TreeNode* node, const VectorUopGeneratorParameterSet* p) :
+        sparta::Unit(node)
+    {}
+
+    void VectorUopGenerator::setInst(const InstPtr & inst)
+    {
+        sparta_assert(current_inst_ == nullptr,
+            "Cannot start generating uops for a new vector instruction, "
+            "current instruction has not finished: " << current_inst_);
+
+        // Number of vector elements processed by each uop
+        const Inst::VCSRs * current_VCSRs = inst->getVCSRs();
+        const uint64_t num_elems_per_uop = Inst::VLEN / current_VCSRs->sew;
+        // TODO: For now, generate uops for all elements even if there is a tail
+        num_uops_to_generate_ = std::ceil(current_VCSRs->vlmax / num_elems_per_uop);
+
+        // Does the instruction have tail elements?
+        const uint32_t num_elems = current_VCSRs->vl / current_VCSRs->sew;
+        inst->setTail(num_elems < current_VCSRs->vlmax);
+
+        if(num_uops_to_generate_ > 1)
+        {
+            current_inst_ = inst;
+            current_inst_->setUOpCount(num_uops_to_generate_);
+            ILOG("Inst: " << current_inst_ << " is being split into " << num_uops_to_generate_ << " UOPs");
+            // Inst counts as the first uop
+            --num_uops_to_generate_;
+        }
+        else
+        {
+            ILOG("Inst: " << inst << " does not need to generate uops");
+        }
+    }
+
+    const InstPtr VectorUopGenerator::generateUop()
+    {
+        ++num_uops_generated_;
+
+        // Increment source and destination register values
+        // TODO: Different generators will handle this differently
+        auto srcs = current_inst_->getSourceOpInfoList();
+        for (auto & src : srcs)
+        {
+            src.field_value += num_uops_generated_;
+        }
+        auto dests = current_inst_->getDestOpInfoList();
+        for (auto & dest : dests)
+        {
+            dest.field_value += num_uops_generated_;
+        }
+
+        // Create uop
+        mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(),
+                                                 srcs,
+                                                 dests,
+                                                 current_inst_->getImmediate());
+        InstPtr uop = mavis_facade_->makeInstDirectly(ex_info, getClock());
+
+        // setting UOp instructions to have the same UID and PID as parent instruction
+        uop->setUniqueID(current_inst_->getUniqueID());
+        uop->setProgramID(current_inst_->getProgramID());
+
+        const Inst::VCSRs * current_VCSRs = current_inst_->getVCSRs();
+        uop->setVCSRs(current_VCSRs);
+        uop->setUOpID(num_uops_generated_);
+
+        // Set weak pointer to parent vector instruction (first uop)
+        sparta::SpartaWeakPointer<olympia::Inst> weak_ptr_inst = current_inst_;
+        uop->setUOpParent(weak_ptr_inst);
+
+        // Handle last uop
+        if(num_uops_generated_ == num_uops_to_generate_)
+        {
+            const uint32_t num_elems = current_VCSRs->vl / current_VCSRs->sew;
+            uop->setTail(num_elems < current_VCSRs->vlmax);
+
+            reset_();
+        }
+
+        ILOG("Generated uop: " << uop);
+        return uop;
+    }
+
+    void VectorUopGenerator::handleFlush(const FlushManager::FlushingCriteria & flush_criteria)
+    {
+        if(current_inst_ && flush_criteria.includedInFlush(current_inst_))
+        {
+            reset_();
+        }
+    }
+} // namespace olympia
diff --git a/core/VectorUopGenerator.hpp b/core/VectorUopGenerator.hpp
new file mode 100644
index 00000000..dbd5ce98
--- /dev/null
+++ b/core/VectorUopGenerator.hpp
@@ -0,0 +1,67 @@
+// <VectorUopGenerator.hpp> -*- C++ -*-
+//! \file VectorUopGenerator.hpp
+#pragma once
+
+#include "sparta/simulation/Unit.hpp"
+#include "sparta/simulation/TreeNode.hpp"
+#include "sparta/simulation/ParameterSet.hpp"
+
+#include "Inst.hpp"
+#include "FlushManager.hpp"
+#include "MavisUnit.hpp"
+
+namespace olympia
+{
+
+    /**
+     * @file VectorUopGenerator.hpp
+     * @brief TODO
+     */
+    class VectorUopGenerator : public sparta::Unit
+    {
+    public:
+        //! \brief Parameters for VectorUopGenerator model
+        class VectorUopGeneratorParameterSet : public sparta::ParameterSet
+        {
+          public:
+            VectorUopGeneratorParameterSet(sparta::TreeNode* n) : sparta::ParameterSet(n) {}
+        };
+
+        /**
+         * @brief Constructor for VectorUopGenerator
+         *
+         * @param node The node that represents (has a pointer to) the VectorUopGenerator
+         * @param p The VectorUopGenerator's parameter set
+         */
+        VectorUopGenerator(sparta::TreeNode* node, const VectorUopGeneratorParameterSet* p);
+
+        //! \brief Name of this resource. Required by sparta::UnitFactory
+        static constexpr char name[] = "vec_uop_gen";
+
+        void setMavis(MavisType * mavis) { mavis_facade_ = mavis; }
+
+        void setInst(const InstPtr & inst);
+
+        const InstPtr generateUop();
+
+        uint64_t getNumUopsRemaining() const { return num_uops_to_generate_; }
+
+        void handleFlush(const FlushManager::FlushingCriteria &);
+
+    private:
+        MavisType * mavis_facade_;
+
+        // TODO: Use Sparta ValidValue
+        InstPtr current_inst_ = nullptr;
+
+        uint64_t num_uops_generated_ = 0;
+        uint64_t num_uops_to_generate_ = 0;
+
+        void reset_()
+        {
+            current_inst_ = nullptr;
+            num_uops_generated_ = 0;
+            num_uops_to_generate_ = 0;
+        }
+    };
+} // namespace olympia
diff --git a/test/core/vector/Vector_test.cpp b/test/core/vector/Vector_test.cpp
index 0fe23183..09f893d8 100644
--- a/test/core/vector/Vector_test.cpp
+++ b/test/core/vector/Vector_test.cpp
@@ -66,7 +66,7 @@ class olympia::DecodeTester {
     void test_VCSRs_sew_32(olympia::Decode &decode) {
         // test VCSRs
         EXPECT_TRUE(decode.VCSRs_.lmul == 1);
-        EXPECT_TRUE(decode.VCSRs_.vl == 512);
+        EXPECT_TRUE(decode.VCSRs_.vl == 32);
         EXPECT_TRUE(decode.VCSRs_.vta == 1);
         EXPECT_TRUE(decode.VCSRs_.sew == 32);
     }
@@ -103,14 +103,18 @@ class olympia::IssueQueueTester {
     }
 };
 
-class olympia::RenameTester{
-    public:
-        void test_hastail(olympia::Rename &rename){
-            const auto & renaming_inst = rename.uop_queue_.read(0);
-            EXPECT_TRUE(renaming_inst->hasTail() == true);
-        }
+class olympia::ROBTester
+{
+public:
+    void test_hastail(olympia::ROB &rob)
+    {
+        sparta_assert(rob.last_inst_retired_ != nullptr,
+            "AHHHH");
+        EXPECT_TRUE(rob.last_inst_retired_->hasTail() == true);
+    }
 };
-void runIQTest(int argc, char **argv) {
+
+void runTests(int argc, char **argv) {
     DEFAULTS.auto_summary_default = "off";
     std::vector<std::string> datafiles;
     std::string input_file;
@@ -133,18 +137,16 @@ void runIQTest(int argc, char **argv) {
             "Enable the experimental vector pipelines");
 
     po::positional_options_description &pos_opts = cls.getPositionalOptions();
-    pos_opts.add("output_file",
-                             -1); // example, look for the <data file> at the end
+    pos_opts.add("output_file", -1); // example, look for the <data file> at the end
 
     int err_code = 0;
     if (!cls.parse(argc, argv, err_code)) {
-        sparta_assert(
-                false,
-                "Command line parsing failed"); // Any errors already printed to cerr
+        sparta_assert(false,
+            "Command line parsing failed"); // Any errors already printed to cerr
     }
 
     sparta_assert(false == datafiles.empty(),
-                                "Need an output file as the last argument of the test");
+        "Need an output file as the last argument of the test");
 
     uint64_t ilimit = 0;
     uint32_t num_cores = 1;
@@ -234,13 +236,13 @@ void runIQTest(int argc, char **argv) {
         issue_queue_tester.no_inst_issued(*my_issuequeue);
     }
     else if(input_file.find("undisturbed_checking.json") != std::string::npos){
-        cls.runSimulator(&sim, 9);
+        cls.runSimulator(&sim, 150);
 
-        // Test Rename
-        olympia::Rename *my_rename = \
-            root_node->getChild("cpu.core0.rename")->getResourceAs<olympia::Rename *>();
-        olympia::RenameTester rename_tester;
-        rename_tester.test_hastail(*my_rename);
+        // Test Retire
+        olympia::ROB *my_rob = \
+            root_node->getChild("cpu.core0.rob")->getResourceAs<olympia::ROB *>();
+        olympia::ROBTester rob_tester;
+        rob_tester.test_hastail(*my_rob);
     }
     else if(input_file.find("vrgather.json") != std::string::npos)
     {
@@ -256,7 +258,7 @@ void runIQTest(int argc, char **argv) {
 }
 
 int main(int argc, char **argv) {
-    runIQTest(argc, argv);
+    runTests(argc, argv);
 
     REPORT_ERROR;
     return (int)ERROR_CODE;
diff --git a/test/core/vector/undisturbed_checking.json b/test/core/vector/undisturbed_checking.json
index e563db16..6754a91d 100644
--- a/test/core/vector/undisturbed_checking.json
+++ b/test/core/vector/undisturbed_checking.json
@@ -4,7 +4,7 @@
         "rs1": 5,
         "rd": 1,
         "vtype": "0x3",
-        "vl": 128
+        "vl": 896
     },
     {
         "mnemonic": "vadd.vv",
@@ -12,4 +12,4 @@
         "vs2": 17,
         "vd": 3
     }
-]
\ No newline at end of file
+]
diff --git a/test/core/vector/vsetivli_vadd_lmul_4.json b/test/core/vector/vsetivli_vadd_lmul_4.json
index 006fc2c3..dde1a6ba 100644
--- a/test/core/vector/vsetivli_vadd_lmul_4.json
+++ b/test/core/vector/vsetivli_vadd_lmul_4.json
@@ -4,7 +4,7 @@
         "rs1": 5,
         "rd": 1,
         "vtype": "0x2",
-        "vl": 1024,
+        "vl": 512,
         "vta": 0
     },
     {
@@ -13,4 +13,4 @@
         "vs2": 17,
         "vd": 3
     }
-]
\ No newline at end of file
+]
diff --git a/test/core/vector/vsetvli_vadd_sew_32.json b/test/core/vector/vsetvli_vadd_sew_32.json
index 5b7ff8bf..14749801 100644
--- a/test/core/vector/vsetvli_vadd_sew_32.json
+++ b/test/core/vector/vsetvli_vadd_sew_32.json
@@ -4,7 +4,7 @@
         "rs1": 2,
         "vtype": "0x10",
         "rd": 1,
-        "vl": 512,
+        "vl": 32,
         "vta": 1
     },
     {
@@ -13,4 +13,4 @@
         "vs2": 17,
         "vd": 3
     }
-]
\ No newline at end of file
+]