Skip to content

Commit

Permalink
Non blocking cache implementation (#166)
Browse files Browse the repository at this point in the history
The implementation is based on the previous work by Vineeth.
However I made change to use a single pipeline and reduce the number of
events generated inside the Dache.
I has also removed the col-easing of the mshr requests (based on the
block address) instead each request generates an mshr and the same
instruction is then sent to next cache for lookup in case of a miss.

Micro architecture details
https://docs.google.com/document/d/1HLlCkfZUtt6BafgVypS5pwS90zo4XOrGFOR1KIzYHLw/edit?usp=sharing

---------

Signed-off-by: Suraj Shirvankar <[email protected]>
  • Loading branch information
h0lyalg0rithm authored Aug 2, 2024
1 parent a318f23 commit 2f6020c
Show file tree
Hide file tree
Showing 17 changed files with 910 additions and 63 deletions.
5 changes: 4 additions & 1 deletion core/CacheFuncModel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ namespace olympia
bool isValid() const { return valid_; }

// Required by SimpleCache2
void setModified(bool m) { (void) m; }
void setModified(bool m) { modified_ = m; }

bool isModified() const { return modified_; }

// Required by SimpleCache2
bool read(uint64_t offset, uint32_t size, uint32_t *buf) const
Expand All @@ -84,6 +86,7 @@ namespace olympia
private:
uint64_t line_size_ = 0;
bool valid_ = false;
bool modified_ = false;

}; // class SimpleCacheLine

Expand Down
292 changes: 249 additions & 43 deletions core/DCache.cpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,61 @@
#include "DCache.hpp"
#include "OlympiaAllocators.hpp"

namespace olympia {
namespace olympia
{
const char DCache::name[] = "cache";

DCache::DCache(sparta::TreeNode *n, const CacheParameterSet *p) :
sparta::Unit(n),
l1_always_hit_(p->l1_always_hit),
cache_latency_(p->cache_latency) {
DCache::DCache(sparta::TreeNode* n, const CacheParameterSet* p) :
sparta::Unit(n),
l1_always_hit_(p->l1_always_hit),
cache_line_size_(p->l1_line_size),
num_mshr_entries_(p->mshr_entries),
mshr_file_("mshr_file", p->mshr_entries, getClock()),
mshr_entry_allocator_(
sparta::notNull(OlympiaAllocators::getOlympiaAllocators(n))->mshr_entry_allocator)
{
sparta_assert(num_mshr_entries_ > 0, "There must be atleast 1 MSHR entry");

in_lsu_lookup_req_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(DCache, receiveMemReqFromLSU_, MemoryAccessInfoPtr));

in_l2cache_resp_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(DCache, receiveRespFromL2Cache_, MemoryAccessInfoPtr));

in_l2cache_ack_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(DCache, receiveAckFromL2Cache_, uint32_t));

in_lsu_lookup_req_.registerConsumerHandler
(CREATE_SPARTA_HANDLER_WITH_DATA(DCache, getInstsFromLSU_, MemoryAccessInfoPtr));
in_lsu_lookup_req_.registerConsumerEvent(in_l2_cache_resp_receive_event_);
in_l2cache_resp_.registerConsumerEvent(in_l2_cache_resp_receive_event_);
setupL1Cache_(p);

in_l2cache_ack_.registerConsumerHandler
(CREATE_SPARTA_HANDLER_WITH_DATA(DCache, getAckFromL2Cache_, uint32_t));
// Pipeline config
cache_pipeline_.enableCollection(n);
cache_pipeline_.performOwnUpdates();
cache_pipeline_.setContinuing(true);

in_l2cache_resp_.registerConsumerHandler
(CREATE_SPARTA_HANDLER_WITH_DATA(DCache, getRespFromL2Cache_, MemoryAccessInfoPtr));
// Pipeline Handlers
cache_pipeline_.registerHandlerAtStage(static_cast<uint32_t>(PipelineStage::LOOKUP),
CREATE_SPARTA_HANDLER(DCache, handleLookup_));

cache_pipeline_.registerHandlerAtStage(static_cast<uint32_t>(PipelineStage::DATA_READ),
CREATE_SPARTA_HANDLER(DCache, handleDataRead_));

cache_pipeline_.registerHandlerAtStage(static_cast<uint32_t>(PipelineStage::DEALLOCATE),
CREATE_SPARTA_HANDLER(DCache, handleDeallocate_));

mshr_file_.enableCollection(n);
}

// DL1 cache config
void DCache::setupL1Cache_(const CacheParameterSet* p)
{ // DL1 cache config
const uint32_t l1_line_size = p->l1_line_size;
const uint32_t l1_size_kb = p->l1_size_kb;
const uint32_t l1_associativity = p->l1_associativity;
std::unique_ptr<sparta::cache::ReplacementIF> repl(new sparta::cache::TreePLRUReplacement
(l1_associativity));
std::unique_ptr<sparta::cache::ReplacementIF> repl(
new sparta::cache::TreePLRUReplacement(l1_associativity));
l1_cache_.reset(new CacheFuncModel(getContainer(), l1_size_kb, l1_line_size, *repl));
addr_decoder_ = l1_cache_->getAddrDecoder();
}

// Reload cache line
Expand All @@ -35,71 +67,233 @@ namespace olympia {
ILOG("DCache reload complete!");
}

// Access DCache
// Access L1Cache
bool DCache::dataLookup_(const MemoryAccessInfoPtr & mem_access_info_ptr)
{
const InstPtr & inst_ptr = mem_access_info_ptr->getInstPtr();
uint64_t phyAddr = inst_ptr->getRAdr();

bool cache_hit = false;

if (l1_always_hit_) {
if (l1_always_hit_)
{
cache_hit = true;
}
else {
else
{
auto cache_line = l1_cache_->peekLine(phyAddr);
cache_hit = (cache_line != nullptr) && cache_line->isValid();

// Update MRU replacement state if DCache HIT
if (cache_hit) {
if (cache_hit)
{
l1_cache_->touchMRU(*cache_line);
}
}

if (l1_always_hit_) {
if (l1_always_hit_)
{
ILOG("DL1 DCache HIT all the time: phyAddr=0x" << std::hex << phyAddr);
dl1_cache_hits_++;
}
else if (cache_hit) {
else if (cache_hit)
{
ILOG("DL1 DCache HIT: phyAddr=0x" << std::hex << phyAddr);
dl1_cache_hits_++;
}
else {
else
{
ILOG("DL1 DCache MISS: phyAddr=0x" << std::hex << phyAddr);
dl1_cache_misses_++;
}

return cache_hit;
}

void DCache::getInstsFromLSU_(const MemoryAccessInfoPtr &memory_access_info_ptr){
const bool hit = dataLookup_(memory_access_info_ptr);
if(hit){
memory_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT);
}else{
memory_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::MISS);
// Poll on dcache_l2cache_credits_ > 0 which means
// that L2Cache can accept requests from DCache.
// Provide a corresponsing backpressure mechanism up the pipeline.
if(!busy_) {
busy_ = true;
cache_pending_inst_ = memory_access_info_ptr;
out_l2cache_req_.send(cache_pending_inst_);

// Set the --dcache_l2cache_credits_ here.
// The lookup stage
void DCache::handleLookup_()
{
ILOG("Lookup stage");
const auto stage_id = static_cast<uint32_t>(PipelineStage::LOOKUP);
const MemoryAccessInfoPtr & mem_access_info_ptr = cache_pipeline_[stage_id];
ILOG(mem_access_info_ptr << " in Lookup stage");
// If the mem request is a refill we dont do anything in the lookup stage
if (mem_access_info_ptr->isRefill())
{
ILOG("Incoming cache refill " << mem_access_info_ptr);
return;
}

const bool hit = dataLookup_(mem_access_info_ptr);
ILOG(mem_access_info_ptr << " performing lookup " << hit);
if (hit)
{
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT);
out_lsu_lookup_ack_.send(mem_access_info_ptr);
return;
}

// Check MSHR Entries for address match
const auto & mshr_itb = mem_access_info_ptr->getMSHRInfoIterator();

if (!mshr_itb.isValid() && mshr_file_.numFree() == 0)
{
// Should be Nack but miss should work for now
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::MISS);
out_lsu_lookup_ack_.send(mem_access_info_ptr);
return;
}

if (!mshr_itb.isValid())
{
if (!mem_access_info_ptr->getMSHRInfoIterator().isValid())
{
ILOG("Creating new MSHR Entry " << mem_access_info_ptr);
allocateMSHREntry_(mem_access_info_ptr);
}
}

const auto & mshr_it = mem_access_info_ptr->getMSHRInfoIterator();
const uint64_t block_addr = getBlockAddr(mem_access_info_ptr);
const bool data_arrived = (*mshr_it)->isDataArrived();
const bool is_store_inst = mem_access_info_ptr->getInstPtr()->isStoreInst();

// All ST are considered Hit
if (is_store_inst)
{
// Update Line fill buffer only if ST
ILOG("Write to Line fill buffer (ST), block address:0x" << std::hex << block_addr);
(*mshr_it)->setModified(true);
(*mshr_it)->setMemRequest(mem_access_info_ptr);
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT);
}
else if (data_arrived)
{
ILOG("Hit on Line fill buffer (LD), block address:0x" << std::hex << block_addr);
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT);
}
else
{
// Enqueue Load in LMQ
ILOG("Load miss inst to LMQ; block address:0x" << std::hex << block_addr);
(*mshr_it)->setMemRequest(mem_access_info_ptr);
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::MISS);
}
out_lsu_lookup_ack_.send(mem_access_info_ptr);
}

uint64_t DCache::getBlockAddr(const MemoryAccessInfoPtr & mem_access_info_ptr) const
{
const InstPtr & inst_ptr = mem_access_info_ptr->getInstPtr();
const auto & inst_target_addr = inst_ptr->getRAdr();
return addr_decoder_->calcBlockAddr(inst_target_addr);
}

// Data read stage
void DCache::handleDataRead_()
{
ILOG("Data Read stage");
const auto stage_id = static_cast<uint32_t>(PipelineStage::DATA_READ);
const MemoryAccessInfoPtr & mem_access_info_ptr = cache_pipeline_[stage_id];
ILOG(mem_access_info_ptr << " in read stage");
if (mem_access_info_ptr->isRefill())
{
reloadCache_(mem_access_info_ptr->getPhyAddr());
return;
}

if (mem_access_info_ptr->isCacheHit())
{
mem_access_info_ptr->setDataReady(true);
}
else
{
if (!l2cache_busy_)
{
out_l2cache_req_.send(mem_access_info_ptr);
l2cache_busy_ = true;
}
else
{
uev_mshr_request_.schedule(sparta::Clock::Cycle(1));
}
}
out_lsu_lookup_ack_.send(mem_access_info_ptr);
}

void DCache::mshrRequest_()
{
ILOG("Send mshr req");
if (!l2cache_busy_)
{
auto iter = mshr_file_.begin();
while (iter != mshr_file_.end())
{

if (iter.isValid())
{
const auto & mshr_entry = *iter;
auto mem_info = mshr_entry->getMemRequest();
if (mshr_entry->isValid() && !mshr_entry->isDataArrived() && mem_info)
{
ILOG("Sending mshr request when not busy " << mem_info);
out_l2cache_req_.send(mem_info);
l2cache_busy_ = true;
break;
}
}
++iter;
}
}
}

void DCache::handleDeallocate_()
{
ILOG("Data Dellocate stage");
const auto stage_id = static_cast<uint32_t>(PipelineStage::DEALLOCATE);
const MemoryAccessInfoPtr & mem_access_info_ptr = cache_pipeline_[stage_id];
ILOG(mem_access_info_ptr << " in deallocate stage");
if (mem_access_info_ptr->isRefill())
{
const auto & mshr_it = mem_access_info_ptr->getMSHRInfoIterator();
if (mshr_it.isValid())
{
MemoryAccessInfoPtr dependant_load_inst = (*mshr_it)->getMemRequest();
out_lsu_lookup_ack_.send(dependant_load_inst);

ILOG("Removing mshr entry for " << mem_access_info_ptr);
mshr_file_.erase(mem_access_info_ptr->getMSHRInfoIterator());
}
return;
}
ILOG("Deallocating pipeline for " << mem_access_info_ptr);
}

void DCache::receiveMemReqFromLSU_(const MemoryAccessInfoPtr & memory_access_info_ptr)
{
ILOG("Received memory access request from LSU " << memory_access_info_ptr);
out_lsu_lookup_ack_.send(memory_access_info_ptr);
in_l2_cache_resp_receive_event_.schedule();
lsu_mem_access_info_ = memory_access_info_ptr;
}

void DCache::getRespFromL2Cache_(const MemoryAccessInfoPtr &memory_access_info_ptr) {
out_lsu_lookup_req_.send(cache_pending_inst_);
reloadCache_(memory_access_info_ptr->getPhyAddr());
cache_pending_inst_.reset();
busy_ = false;
void DCache::receiveRespFromL2Cache_(const MemoryAccessInfoPtr & memory_access_info_ptr)
{
ILOG("Received cache refill " << memory_access_info_ptr);
// We mark the mem access to refill, this could be moved to the lower level caches later
memory_access_info_ptr->setIsRefill(true);
l2_mem_access_info_ = memory_access_info_ptr;
const auto & mshr_itb = memory_access_info_ptr->getMSHRInfoIterator();
if(mshr_itb.isValid()){
ILOG("Removing mshr entry for " << memory_access_info_ptr);
mshr_file_.erase(memory_access_info_ptr->getMSHRInfoIterator());
}
l2cache_busy_ = false;
in_l2_cache_resp_receive_event_.schedule();
}

void DCache::getAckFromL2Cache_(const uint32_t &ack) {
void DCache::receiveAckFromL2Cache_(const uint32_t & ack)
{
// When DCache sends the request to L2Cache for a miss,
// This bool will be set to false, and Dcache should wait for ack from
// L2Cache notifying DCache that there is space in it's dcache request buffer
Expand All @@ -108,4 +302,16 @@ namespace olympia {
dcache_l2cache_credits_ = ack;
}

}
// MSHR Entry allocation in case of miss
void DCache::allocateMSHREntry_(const MemoryAccessInfoPtr & mem_access_info_ptr)
{
sparta_assert(mshr_file_.size() <= num_mshr_entries_, "Appending mshr causes overflows!");

MSHREntryInfoPtr mshr_entry = sparta::allocate_sparta_shared_pointer<MSHREntryInfo>(
mshr_entry_allocator_, cache_line_size_, getClock());

const auto & it = mshr_file_.push_back(mshr_entry);
mem_access_info_ptr->setMSHREntryInfoIterator(it);
}

} // namespace olympia
Loading

0 comments on commit 2f6020c

Please sign in to comment.