From 7b695f939bdd729c041ed8569d3a6c364f614ce8 Mon Sep 17 00:00:00 2001 From: wqyin Date: Mon, 28 Feb 2022 14:32:36 -0600 Subject: [PATCH] Fix the assertion failure when enable MLC * The mlc_fill_line was not handling insertion correctly. WB of victim was handled after new line got inserted. The same issue didn't appear in l1_fill_line * Add option to support private mlc * Turn on MLC by default --- src/libs/cache_lib.c | 6 +- src/memory/memory.c | 211 +++++++++++++++++++++--------------- src/memory/memory.param.def | 9 +- 3 files changed, 134 insertions(+), 92 deletions(-) diff --git a/src/libs/cache_lib.c b/src/libs/cache_lib.c index 10bca623..9a1f541b 100644 --- a/src/libs/cache_lib.c +++ b/src/libs/cache_lib.c @@ -471,7 +471,11 @@ void cache_invalidate(Cache* cache, Addr addr, Addr* line_addr) { /** - * @brief Return a pointer to the lru item in the cache set + * @brief Return a pointer to the victim to be replaced + * + * The caller of this func is supposed to handle the possible + * writeback correctly, otherwise the correctness of simulation + * is compromised * * @param cache * @param proc_id diff --git a/src/memory/memory.c b/src/memory/memory.c index ae7ae886..23302e18 100644 --- a/src/memory/memory.c +++ b/src/memory/memory.c @@ -288,9 +288,11 @@ void init_mem_req_type_priorities() { } } -/**************************************************************************************/ -/* init_memory: */ +/** + * @brief Init memory + * + */ void init_memory() { int ii; char name[20]; @@ -396,19 +398,37 @@ void init_memory() { void init_uncores(void) { mem->uncores = (Uncore*)malloc(sizeof(Uncore) * NUM_CORES); - /* Initialize MLC cache (shared only for now) */ - Ported_Cache* mlc = (Ported_Cache*)malloc(sizeof(Ported_Cache)); - init_cache(&mlc->cache, "MLC_CACHE", MLC_SIZE, MLC_ASSOC, MLC_LINE_SIZE, - sizeof(MLC_Data), MLC_CACHE_REPL_POLICY); - mlc->num_banks = MLC_BANKS; - mlc->ports = (Ports*)malloc(sizeof(Ports) * mlc->num_banks); - for(uns ii = 0; ii < mlc->num_banks; ii++) { - char name[MAX_STR_LENGTH + 1]; - snprintf(name, MAX_STR_LENGTH, "MLC BANK %d PORTS", ii); - init_ports(&mlc->ports[ii], name, MLC_READ_PORTS, MLC_WRITE_PORTS, FALSE); - } - for(uns proc_id = 0; proc_id < NUM_CORES; proc_id++) { - MLC(proc_id) = mlc; + /* Initialize MLC cache */ + if(PRIVATE_MLC){ + for(uns proc_id = 0; proc_id < NUM_CORES; proc_id++){ + Ported_Cache* mlc = (Ported_Cache*)malloc(sizeof(Ported_Cache)); + char buf[MAX_STR_LENGTH+1]; + sprintf(buf, "MLC[%d]", proc_id); + init_cache(&mlc->cache, buf, MLC_SIZE, MLC_ASSOC, MLC_LINE_SIZE, + sizeof(MLC_Data), MLC_CACHE_REPL_POLICY); + mlc->num_banks = MLC_BANKS; + mlc->ports = (Ports*)malloc(sizeof(Ports) * mlc->num_banks); + for(uns ii=0; ii < mlc->num_banks; ii++){ + char name[MAX_STR_LENGTH+1]; + snprintf(name, MAX_STR_LENGTH, "MLC[%d] BANK %d PORTS", proc_id, ii); + init_ports(&mlc->ports[ii], name, MLC_READ_PORTS, MLC_WRITE_PORTS, FALSE); + } + MLC(proc_id) = mlc; + } + } else { + Ported_Cache* mlc = (Ported_Cache*)malloc(sizeof(Ported_Cache)); + init_cache(&mlc->cache, "MLC_CACHE", MLC_SIZE, MLC_ASSOC, MLC_LINE_SIZE, + sizeof(MLC_Data), MLC_CACHE_REPL_POLICY); + mlc->num_banks = MLC_BANKS; + mlc->ports = (Ports*)malloc(sizeof(Ports) * mlc->num_banks); + for(uns ii = 0; ii < mlc->num_banks; ii++) { + char name[MAX_STR_LENGTH + 1]; + snprintf(name, MAX_STR_LENGTH, "MLC BANK %d PORTS", ii); + init_ports(&mlc->ports[ii], name, MLC_READ_PORTS, MLC_WRITE_PORTS, FALSE); + } + for(uns proc_id = 0; proc_id < NUM_CORES; proc_id++) { + MLC(proc_id) = mlc; + } } /* Initialize LLC */ @@ -434,6 +454,7 @@ void init_uncores(void) { snprintf(name, MAX_STR_LENGTH, "L1[%d] BANK %d PORTS", proc_id, ii); init_ports(&l1->ports[ii], name, L1_READ_PORTS, L1_WRITE_PORTS, FALSE); } + // Use this macro to unify the handling of private/share L1 L1(proc_id) = l1; } } else { @@ -813,7 +834,7 @@ int cycle_busoutq_insert_count = 0; int l1_in_buf_count = 0; /** - * @brief Not sure what this func is doing + * @brief Sort all mem queues * */ void update_memory_queues() { @@ -895,7 +916,7 @@ void update_memory() { if(freq_is_ready(FREQ_DOMAIN_L1)) { cycle_count = freq_cycle_count(FREQ_DOMAIN_L1); - mem_process_bus_out_reqs(); + mem_process_bus_out_reqs(); // obsolete code, nothing will be executed mem_process_l1_reqs(); mem_process_mlc_reqs(); } @@ -1105,7 +1126,7 @@ Flag mem_process_l1_hit_access(Mem_Req* req, Mem_Queue_Entry* l1_queue_entry, DEBUG(req->proc_id, "Req index:%d no longer a chip demand\n", req->id); } - // this is just a stat collection + // write port related stuff, currently only stat collection wp_process_l1_hit(data, req); if(L1_WRITE_THROUGH && (req->type == MRT_WB)) { @@ -1364,7 +1385,7 @@ static Flag mem_process_l1_miss_access(Mem_Req* req, } /** - * Case 3: propogate teh miss downwards, marks the req as L1_miss + * Case 3: propogate the miss downwards, marks the req as L1_miss */ req->l1_miss = TRUE; req->l1_miss_cycle = cycle_count; @@ -1497,13 +1518,14 @@ static Flag mem_process_mlc_miss_access(Mem_Req* req, } /** - * @brief - * Returns TRUE if l1 access is complete and needs to be removed from l1_queue + * @brief Process the L1 reg already obtain the port + * + * If hit in L1, send req back upwards. Otherwise try sending it out to bus (ramulator) * @param req * @param l1_queue_entry * @param out_queue_insertion_count * @param reserved_entry_count - * @return Flag + * @return Flag TRUE if l1 access is complete and needs to be removed from l1_queue */ static Flag mem_complete_l1_access(Mem_Req* req, Mem_Queue_Entry* l1_queue_entry, @@ -1697,11 +1719,10 @@ static Flag mem_complete_l1_access(Mem_Req* req, req->state = MRS_MEM_NEW; l1_miss_access = ramulator_send(req); if(!l1_miss_access) { - // STAT_EVENT(req->proc_id, REJECTED_QUEUE_BUS_OUT); - + // Fail to send req to dram req->state = MRS_L1_WAIT; access_done = FALSE; - } else { + } else { //send to dram succeed ASSERT(req->proc_id, req->mem_queue_cycle >= req->rdy_cycle); req->queue = NULL; @@ -1789,11 +1810,16 @@ static Flag mem_complete_l1_access(Mem_Req* req, return access_done; } -/**************************************************************************************/ -/* mem_complete_mlc_access: */ -/* Returns TRUE if mlc access is complete and needs to be removed from mlc_queue - */ +/** + * @brief + * + * @param req + * @param mlc_queue_entry + * @param l1_queue_insertion_count + * @param reserved_entry_count + * @return Flag Returns TRUE if mlc access is complete and needs to be removed from mlc_queue + */ static Flag mem_complete_mlc_access(Mem_Req* req, Mem_Queue_Entry* mlc_queue_entry, int* l1_queue_insertion_count, @@ -1920,9 +1946,8 @@ static void mem_process_l1_reqs() { /* Request is ready: see what state it is in */ - /* If this is a new request, try reserve L1 port and transition to wait state */ if(req->state == MRS_L1_NEW) { - mem_start_l1_access(req); + mem_start_l1_access(req); //obtain port for req STAT_EVENT(req->proc_id, L1_ACCESS); if(req->type == MRT_DPRF || req->type == MRT_IPRF) STAT_EVENT(req->proc_id, L1_PREF_ACCESS); @@ -1975,11 +2000,12 @@ static void mem_process_l1_reqs() { } } -/**************************************************************************************/ -/* mem_process_mlc_reqs: */ -/* Access MLC if port is ready - If MLC miss, then put the request into miss - * queue */ +/** + * @brief Access MLC if port is ready - If MLC miss, then put the request into miss + * queue + * + */ static void mem_process_mlc_reqs() { Mem_Req* req = NULL; int ii; @@ -2065,6 +2091,12 @@ static void mem_process_mlc_reqs() { /* mem_process_bus_out_reqs: */ /* FIXME: need to busy the bus for the time a line is being sent */ +/** + * @brief Obsolete, bus_out is repalced by ramulator. The function + * will still be called but since bus_out_queue is supposed to always + * be 0, the first return will take the execution out of the function + * + */ static void mem_process_bus_out_reqs() { Mem_Req* req; int ii; @@ -2084,6 +2116,7 @@ static void mem_process_bus_out_reqs() { // return; // VEYNU: if there is no room in the mem queue do nothing return; // Ramulator: early return if bus_out_queue is empty } + //WQ: will this ever be executed? ASSERTM(0, FALSE, "ERROR: bus_out_queue should always be empty\n"); // Ramulator // Ramulator handles off-chip communication latency itself. So we @@ -4725,60 +4758,6 @@ Flag mlc_fill_line(Mem_Req* req) { /* if (!get_write_port(&MLC(req->proc_id)->ports[req->mlc_bank])) return * FAILURE; */ - // Put prefetches in the right position for replacement - // cmp FIXME prefetchers - if(req->type == MRT_DPRF || req->type == MRT_IPRF) { - mem->pref_replpos = INSERT_REPL_DEFAULT; - if(PREF_INSERT_LRU) { - mem->pref_replpos = INSERT_REPL_LRU; - STAT_EVENT(req->proc_id, PREF_REPL_LRU); - } else if(PREF_INSERT_MIDDLE) { - mem->pref_replpos = INSERT_REPL_MID; - STAT_EVENT(req->proc_id, PREF_REPL_MID); - } else if(PREF_INSERT_LOWQTR) { - mem->pref_replpos = INSERT_REPL_LOWQTR; - STAT_EVENT(req->proc_id, PREF_REPL_LOWQTR); - } - data = (MLC_Data*)cache_insert_replpos( - &MLC(req->proc_id)->cache, req->proc_id, req->addr, &line_addr, - &repl_line_addr, mem->pref_replpos, TRUE); - } else { - data = (MLC_Data*)cache_insert(&MLC(req->proc_id)->cache, req->proc_id, - req->addr, &line_addr, &repl_line_addr); - } - - if(req->type == MRT_WB_NODIRTY || req->type == MRT_WB) { - STAT_EVENT(req->proc_id, MLC_WB_FILL); - STAT_EVENT(req->proc_id, CORE_MLC_WB_FILL); - } else { - STAT_EVENT(req->proc_id, MLC_FILL); - STAT_EVENT(req->proc_id, CORE_MLC_FILL); - INC_STAT_EVENT_ALL(TOTAL_MEM_LATENCY, cycle_count - req->mlc_miss_cycle); - INC_STAT_EVENT(req->proc_id, CORE_MEM_LATENCY, - cycle_count - req->mlc_miss_cycle); - - if(req->type != MRT_DPRF && req->type != MRT_IPRF && - !req->demand_match_prefetch) { - STAT_EVENT(req->proc_id, MLC_DEMAND_FILL); - STAT_EVENT(req->proc_id, CORE_MLC_DEMAND_FILL); - INC_STAT_EVENT_ALL(TOTAL_MEM_LATENCY_DEMAND, - cycle_count - req->mlc_miss_cycle); - INC_STAT_EVENT(req->proc_id, CORE_MEM_LATENCY_DEMAND, - cycle_count - req->mlc_miss_cycle); - } else { - STAT_EVENT(req->proc_id, MLC_PREF_FILL); - STAT_EVENT(req->proc_id, CORE_MLC_PREF_FILL); - INC_STAT_EVENT_ALL(TOTAL_MEM_LATENCY_PREF, - cycle_count - req->mlc_miss_cycle); - INC_STAT_EVENT(req->proc_id, CORE_MEM_LATENCY_PREF, - cycle_count - req->mlc_miss_cycle); - if(req->demand_match_prefetch) { - STAT_EVENT(req->proc_id, CORE_MLC_PREF_FILL_PARTIAL_USED); - STAT_EVENT(req->proc_id, CORE_PREF_MLC_PARTIAL_USED); - STAT_EVENT_ALL(PREF_MLC_TOTAL_PARTIAL_USED); - } - } - } /* Do not insert the line yet, just check which line we need to replace. If that line is dirty, it's possible @@ -4884,6 +4863,60 @@ Flag mlc_fill_line(Mem_Req* req) { } } + if(req->type == MRT_DPRF || req->type == MRT_IPRF) { + mem->pref_replpos = INSERT_REPL_DEFAULT; + if(PREF_INSERT_LRU) { + mem->pref_replpos = INSERT_REPL_LRU; + STAT_EVENT(req->proc_id, PREF_REPL_LRU); + } else if(PREF_INSERT_MIDDLE) { + mem->pref_replpos = INSERT_REPL_MID; + STAT_EVENT(req->proc_id, PREF_REPL_MID); + } else if(PREF_INSERT_LOWQTR) { + mem->pref_replpos = INSERT_REPL_LOWQTR; + STAT_EVENT(req->proc_id, PREF_REPL_LOWQTR); + } + data = (MLC_Data*)cache_insert_replpos( + &MLC(req->proc_id)->cache, req->proc_id, req->addr, &line_addr, + &repl_line_addr, mem->pref_replpos, TRUE); + } else { + data = (MLC_Data*)cache_insert(&MLC(req->proc_id)->cache, req->proc_id, + req->addr, &line_addr, &repl_line_addr); + } + + if(req->type == MRT_WB_NODIRTY || req->type == MRT_WB) { + STAT_EVENT(req->proc_id, MLC_WB_FILL); + STAT_EVENT(req->proc_id, CORE_MLC_WB_FILL); + } else { + STAT_EVENT(req->proc_id, MLC_FILL); + STAT_EVENT(req->proc_id, CORE_MLC_FILL); + INC_STAT_EVENT_ALL(TOTAL_MEM_LATENCY, cycle_count - req->mlc_miss_cycle); + INC_STAT_EVENT(req->proc_id, CORE_MEM_LATENCY, + cycle_count - req->mlc_miss_cycle); + + if(req->type != MRT_DPRF && req->type != MRT_IPRF && + !req->demand_match_prefetch) { + STAT_EVENT(req->proc_id, MLC_DEMAND_FILL); + STAT_EVENT(req->proc_id, CORE_MLC_DEMAND_FILL); + INC_STAT_EVENT_ALL(TOTAL_MEM_LATENCY_DEMAND, + cycle_count - req->mlc_miss_cycle); + INC_STAT_EVENT(req->proc_id, CORE_MEM_LATENCY_DEMAND, + cycle_count - req->mlc_miss_cycle); + } else { + STAT_EVENT(req->proc_id, MLC_PREF_FILL); + STAT_EVENT(req->proc_id, CORE_MLC_PREF_FILL); + INC_STAT_EVENT_ALL(TOTAL_MEM_LATENCY_PREF, + cycle_count - req->mlc_miss_cycle); + INC_STAT_EVENT(req->proc_id, CORE_MEM_LATENCY_PREF, + cycle_count - req->mlc_miss_cycle); + if(req->demand_match_prefetch) { + STAT_EVENT(req->proc_id, CORE_MLC_PREF_FILL_PARTIAL_USED); + STAT_EVENT(req->proc_id, CORE_PREF_MLC_PARTIAL_USED); + STAT_EVENT_ALL(PREF_MLC_TOTAL_PARTIAL_USED); + } + } + } + + /* this will make it bring the line into the mlc and then modify it */ data->proc_id = req->proc_id; data->dirty = ((req->type == MRT_WB) && diff --git a/src/memory/memory.param.def b/src/memory/memory.param.def index b29aafab..13cff708 100644 --- a/src/memory/memory.param.def +++ b/src/memory/memory.param.def @@ -51,13 +51,18 @@ */ DEF_PARAM(enable_swprf, ENABLE_SWPRF, Flag, Flag, FALSE, ) -/* MLC */ -DEF_PARAM(mlc_present, MLC_PRESENT, Flag, Flag, FALSE, ) +/** + * MLC + * parameters (size, bank etc.) refer to single MLC is configured + * as private. If shared, parameters refer to the aggregated capacity + */ +DEF_PARAM(mlc_present, MLC_PRESENT, Flag, Flag, TRUE, ) DEF_PARAM(mlc_size, MLC_SIZE, uns, uns, (512 * 1024), ) DEF_PARAM(mlc_assoc, MLC_ASSOC, uns, uns, 4, ) DEF_PARAM(mlc_line_size, MLC_LINE_SIZE, uns, uns, 64, ) DEF_PARAM(mlc_cycles, MLC_CYCLES, uns, uns, 12, ) DEF_PARAM(perfect_mlc, PERFECT_MLC, Flag, Flag, FALSE, ) +DEF_PARAM(private_mlc, PRIVATE_MLC, Flag, Flag, TRUE, ) DEF_PARAM(mlc_read_ports, MLC_READ_PORTS, uns, uns, 1, ) DEF_PARAM(mlc_write_ports, MLC_WRITE_PORTS, uns, uns, 1, ) DEF_PARAM(mlc_banks, MLC_BANKS, uns, uns, 8, )