diff --git a/example/llvm7-CPU2006-cfg/sched.ini b/example/llvm7-CPU2006-cfg/sched.ini index 7036b76f..bf1c4205 100644 --- a/example/llvm7-CPU2006-cfg/sched.ini +++ b/example/llvm7-CPU2006-cfg/sched.ini @@ -147,6 +147,7 @@ LATENCY_PRECISION LLVM # The scheduler used to find an initial feasible schedule. # LIST: List scheduler # SEQ: Sequential list scheduler +# STALLING_LIST: Schedules stalls until instruction with top heuristic score becomes ready HEUR_SCHED_TYPE LIST # What circumstances the ACO dual cost algorithm should be applied @@ -161,7 +162,7 @@ ACO_DUAL_COST_FN_ENABLE OFF # The option NONE disables the constraint cost function for the selected pass # NOTE: If the constraint cost function is SLIL then the cost function must also be SLIL # NOTE: The value for ACO2P is used in the second pass -ACO_DUAL_COST_FN TARGET +ACO_DUAL_COST_FN SLIL ACO2P_DUAL_COST_FN NONE #use 3-tournament @@ -199,8 +200,10 @@ ACO_DBG_REGIONS NONE ACO_DBG_REGIONS_OUT_PATH /home/user/path_to_graph_output_directory/ -# The importance of the heuristic in ACO. ACO uses (1/heuristic)^importance, so -# importance of 0 means don't use the heuristic. +# Previously the heuristic was raised to the power of the heuristic importance, +# but this has proved to not be useful, and added an expensive pow operation. +# Now a heuristic importance of 0 disables the heuristic. Any other value leaves the +# heuristic enabled. ACO_HEURISTIC_IMPORTANCE 1 ACO2P_HEURISTIC_IMPORTANCE 1 diff --git a/example/optsched-cfg/sched.ini b/example/optsched-cfg/sched.ini index 7d387d15..f74a6feb 100644 --- a/example/optsched-cfg/sched.ini +++ b/example/optsched-cfg/sched.ini @@ -147,6 +147,7 @@ LATENCY_PRECISION LLVM # The scheduler used to find an initial feasible schedule. # LIST: List scheduler # SEQ: Sequential list scheduler +# STALLING_LIST: Schedules stalls until instruction with top heuristic score becomes ready HEUR_SCHED_TYPE LIST # What circumstances the ACO dual cost algorithm should be applied @@ -161,7 +162,7 @@ ACO_DUAL_COST_FN_ENABLE OFF # The option NONE disables the constraint cost function for the selected pass # NOTE: If the constraint cost function is SLIL then the cost function must also be SLIL # NOTE: The value for ACO2P is used in the second pass -ACO_DUAL_COST_FN TARGET +ACO_DUAL_COST_FN SLIL ACO2P_DUAL_COST_FN NONE #use 3-tournament @@ -199,8 +200,10 @@ ACO_DBG_REGIONS NONE ACO_DBG_REGIONS_OUT_PATH /home/user/path_to_graph_output_directory/ -# The importance of the heuristic in ACO. ACO uses (1/heuristic)^importance, so -# importance of 0 means don't use the heuristic. +# Previously the heuristic was raised to the power of the heuristic importance, +# but this has proved to not be useful, and added an expensive pow operation. +# Now a heuristic importance of 0 disables the heuristic. Any other value leaves the +# heuristic enabled. ACO_HEURISTIC_IMPORTANCE 1 ACO2P_HEURISTIC_IMPORTANCE 1 diff --git a/include/opt-sched/Scheduler/aco.h b/include/opt-sched/Scheduler/aco.h index b4aec360..9c1c775e 100644 --- a/include/opt-sched/Scheduler/aco.h +++ b/include/opt-sched/Scheduler/aco.h @@ -17,7 +17,6 @@ Last Update: Jan. 2020 #include #include #include - namespace llvm { namespace opt_sched { @@ -32,7 +31,7 @@ enum class DCF_OPT { struct Choice { SchedInstruction *inst; - double heuristic; // range 1 to 2 + pheromone_t heuristic; // range 1 to 2 InstCount readyOn; // number of cycles until this instruction becomes ready }; @@ -51,7 +50,7 @@ class ACOScheduler : public ConstrainedScheduler { private: pheromone_t &Pheromone(SchedInstruction *from, SchedInstruction *to); pheromone_t &Pheromone(InstCount from, InstCount to); - double Score(SchedInstruction *from, Choice choice); + pheromone_t Score(SchedInstruction *from, Choice choice); bool shouldReplaceSchedule(InstSchedule *OldSched, InstSchedule *NewSched, bool IsGlobal); DCF_OPT ParseDCFOpt(const std::string &opt); @@ -78,7 +77,7 @@ class ACOScheduler : public ConstrainedScheduler { Choice SelectInstruction(const llvm::ArrayRef &ready, SchedInstruction *lastInst); void UpdatePheromone(InstSchedule *schedule); - std::unique_ptr FindOneSchedule(); + std::unique_ptr FindOneSchedule(InstCount TargetRPCost); llvm::SmallVector pheromone_; pheromone_t initialValue_; bool use_fixed_bias; @@ -90,6 +89,8 @@ class ACOScheduler : public ConstrainedScheduler { double local_decay; double decay_factor; int ants_per_iteration; + int ants_per_iteration1p; + int ants_per_iteration2p; int noImprovementMax; bool print_aco_trace; std::unique_ptr InitialSchedule; @@ -99,6 +100,7 @@ class ACOScheduler : public ConstrainedScheduler { pheromone_t ScRelMax; DCF_OPT DCFOption; SPILL_COST_FUNCTION DCFCostFn; + int localCmp = 0, localCmpRej = 0, globalCmp = 0, globalCmpRej = 0; }; } // namespace opt_sched diff --git a/include/opt-sched/Scheduler/bb_spill.h b/include/opt-sched/Scheduler/bb_spill.h index 53f4f49e..11aa31f9 100644 --- a/include/opt-sched/Scheduler/bb_spill.h +++ b/include/opt-sched/Scheduler/bb_spill.h @@ -80,6 +80,7 @@ class BBWithSpill : public SchedRegion { InstCount totSpillCost_; InstCount slilSpillCost_; bool trackLiveRangeLngths_; + bool NeedsComputeSLIL; // Virtual Functions: // Given a schedule, compute the cost function value @@ -127,6 +128,8 @@ class BBWithSpill : public SchedRegion { // cost of the schedule using Scf whenever the spill cost updates void addRecordedCost(SPILL_COST_FUNCTION Scf); void storeExtraCost(InstSchedule *sched, SPILL_COST_FUNCTION Scf); + InstCount getUnnormalizedIncrementalRPCost() const; + void CmputAndSetCostLwrBound(); int cmputSpillCostLwrBound(); @@ -149,6 +152,7 @@ class BBWithSpill : public SchedRegion { InstCount slotNum, EnumTreeNode *trgtNode); void SetSttcLwrBounds(EnumTreeNode *node); bool ChkInstLglty(SchedInstruction *inst); + bool needsSLIL() const; void InitForSchdulng(); protected: diff --git a/include/opt-sched/Scheduler/data_dep.h b/include/opt-sched/Scheduler/data_dep.h index 36020266..da2e6eab 100644 --- a/include/opt-sched/Scheduler/data_dep.h +++ b/include/opt-sched/Scheduler/data_dep.h @@ -633,7 +633,7 @@ class InstSchedule { InstCount NormSpillCost; // Stores the spill cost of other spill cost functions - InstCount storedSC[MAX_SCHED_PRIRTS]; + InstCount storedSC[MAX_SCF_TYPES]; // An array of peak reg pressures for all reg types in the schedule InstCount *peakRegPressures_; diff --git a/include/opt-sched/Scheduler/gen_sched.h b/include/opt-sched/Scheduler/gen_sched.h index af3ba4ec..a7221822 100644 --- a/include/opt-sched/Scheduler/gen_sched.h +++ b/include/opt-sched/Scheduler/gen_sched.h @@ -33,7 +33,9 @@ enum SchedulerType { // List scheduler. SCHED_LIST, // Sequential list scheduler. - SCHED_SEQ + SCHED_SEQ, + // Stalling list scheduler + SCHED_STALLING_LIST, }; // Forward declarations used to reduce the number of #includes. diff --git a/include/opt-sched/Scheduler/list_sched.h b/include/opt-sched/Scheduler/list_sched.h index 16b6aa2e..03ff4dcc 100644 --- a/include/opt-sched/Scheduler/list_sched.h +++ b/include/opt-sched/Scheduler/list_sched.h @@ -52,6 +52,20 @@ class SequentialListScheduler : public ListScheduler { bool ChkInstLglty_(SchedInstruction *inst) const override; }; +// A list scheduler that schedules the instruction with the top heuristic value +// Unalike ListScheduler this class considers instructions that are ready +// in terms of data dependencies, but not in terms of latencies. +// If the instruction with the top heuristic is not ready in terms of latency +// Then stalls will be inserted until it is ready +class StallSchedulingListScheduler : public ListScheduler { +public: + StallSchedulingListScheduler(DataDepGraph *dataDepGraph, + MachineModel *machMdl, InstCount schedUprBound, + SchedPriorities prirts); + + SchedInstruction *PickInst() const; +}; + } // namespace opt_sched } // namespace llvm diff --git a/include/opt-sched/Scheduler/sched_basic_data.h b/include/opt-sched/Scheduler/sched_basic_data.h index 85bcec19..5602fdc3 100644 --- a/include/opt-sched/Scheduler/sched_basic_data.h +++ b/include/opt-sched/Scheduler/sched_basic_data.h @@ -83,6 +83,8 @@ enum SPILL_COST_FUNCTION { SCF_TARGET }; +#define MAX_SCF_TYPES 10 + // The type of instruction signatures, used by the enumerator's history table to // keep track of partial schedules. typedef UDT_HASHKEY InstSignature; diff --git a/include/opt-sched/Scheduler/sched_region.h b/include/opt-sched/Scheduler/sched_region.h index f4097d16..88115aaa 100644 --- a/include/opt-sched/Scheduler/sched_region.h +++ b/include/opt-sched/Scheduler/sched_region.h @@ -73,6 +73,8 @@ class SchedRegion { // Get the number of simulated spills code added for this block. inline int GetSimSpills() { return totalSimSpills_; } + // Gets the un-normalized incremental RP cost for the region(used by ACO) + virtual InstCount getUnnormalizedIncrementalRPCost() const = 0; // Get schedLength for best-so-far sched inline InstCount getBestSchedLength() { return bestSchedLngth_; } @@ -312,6 +314,8 @@ class SchedRegion { virtual bool EnableEnum_() = 0; + virtual bool needsSLIL() const = 0; + // Prepares the region for being scheduled. virtual void SetupForSchdulng_() = 0; diff --git a/lib/Scheduler/aco.cpp b/lib/Scheduler/aco.cpp index 74b277e2..9d2b8d71 100644 --- a/lib/Scheduler/aco.cpp +++ b/lib/Scheduler/aco.cpp @@ -8,6 +8,7 @@ #include "llvm/ADT/STLExtras.h" #include #include +#include #include using namespace llvm::opt_sched; @@ -22,6 +23,14 @@ double RandDouble(double min, double max) { return (rand * (max - min)) + min; } +#define DBG_SRS 0 + +#if DBG_SRS +#define SRS_DBG_LOG(...) Logger::Info(__VA_ARGS__) +#else +#define SRS_DBG_LOG(...) static_cast(0) +#endif + #define USE_ACS 0 #define TWO_STEP 1 #define MIN_DEPOSITION 1 @@ -57,7 +66,10 @@ ACOScheduler::ACOScheduler(DataDepGraph *dataDepGraph, bias_ratio = schedIni.GetFloat("ACO_BIAS_RATIO"); local_decay = schedIni.GetFloat("ACO_LOCAL_DECAY"); decay_factor = schedIni.GetFloat("ACO_DECAY_FACTOR"); - ants_per_iteration = schedIni.GetInt("ACO_ANT_PER_ITERATION"); + ants_per_iteration1p = schedIni.GetInt("ACO_ANT_PER_ITERATION"); + ants_per_iteration2p = + schedIni.GetInt("ACO2P_ANT_PER_ITERATION", ants_per_iteration1p); + ants_per_iteration = ants_per_iteration1p; print_aco_trace = schedIni.GetBool("ACO_TRACE"); IsTwoPassEn = schedIni.GetBool("USE_TWO_PASS"); DCFOption = ParseDCFOpt(schedIni.GetString("ACO_DUAL_COST_FN_ENABLE", "OFF")); @@ -113,49 +125,79 @@ pheromone_t &ACOScheduler::Pheromone(InstCount from, InstCount to) { } double ACOScheduler::Score(SchedInstruction *from, Choice choice) { - return Pheromone(from, choice.inst) * - pow(choice.heuristic, heuristicImportance_); + // tuneable heuristic importance is temporarily disabled + // return Pheromone(from, choice.inst) * + // pow(choice.heuristic, heuristicImportance_); + double hf = heuristicImportance_ ? choice.heuristic : 1.0; + return Pheromone(from, choice.inst) * hf; } bool ACOScheduler::shouldReplaceSchedule(InstSchedule *OldSched, InstSchedule *NewSched, bool IsGlobal) { +#if DBG_SRS + std::string CmpLn = "SRS/"; + CmpLn += IsGlobal ? "g/" : ""; +#endif // DBG_SRS + + const auto SchedCost = [this](InstSchedule *Sched) { + return !IsTwoPassEn ? Sched->GetCost() : Sched->GetNormSpillCost(); + }; + // return true if the old schedule is null (eg:there is no old schedule) // return false if the new schedule is is NULL - if (!OldSched) + if (!OldSched) { + SRS_DBG_LOG("SRS/Old:null, New:%d", !NewSched ? -1 : SchedCost(NewSched)); return true; - else if (!NewSched) + } else if (!NewSched) { + // not likely to happen + SRS_DBG_LOG("SRS/Old:%d, New:null", SchedCost(OldSched)); return false; - - // if we are using the dual cost function algorithm use the DCF code - // DCF has 4 different behaviors: - // OFF - Does Nothing - // GLOBAL_ONLY - Only applies to comparisons of the globally best schedule - // GLOBAL_AND_TIGHTEN - If a schedule has a lower DCFCost it wins - // GLOBAL_AND_ITERATION - Rejects any schedule with a worse DCFCost - if ((DCFOption == DCF_OPT::GLOBAL_ONLY && IsGlobal) || - DCFOption == DCF_OPT::GLOBAL_AND_TIGHTEN || - DCFOption == DCF_OPT::GLOBAL_AND_ITERATION) { - InstCount NewDCFCost = NewSched->GetExtraSpillCost(DCFCostFn); - InstCount OldDCFCost = OldSched->GetExtraSpillCost(DCFCostFn); - if (NewDCFCost < OldDCFCost) - return true; - else if ((DCFOption == DCF_OPT::GLOBAL_ONLY && IsGlobal) || - DCFOption == DCF_OPT::GLOBAL_AND_ITERATION) { - if (NewDCFCost > OldDCFCost) - return false; - } } // if it is the 1st pass return the cost comparison // if it is the 2nd pass return true if the RP cost and ILP cost is less - if (!IsTwoPassEn) - return NewSched->GetCost() < OldSched->GetCost(); - else if (!rgn_->IsSecondPass()) - return NewSched->GetNormSpillCost() < OldSched->GetNormSpillCost(); - else - return (NewSched->GetNormSpillCost() <= OldSched->GetNormSpillCost()) && - (NewSched->GetExecCost() < OldSched->GetExecCost()); + if (!IsTwoPassEn || !rgn_->IsSecondPass()) { + InstCount NewCost = SchedCost(NewSched); + InstCount OldCost = SchedCost(OldSched); + +#if DBG_SRS + CmpLn += + "Old:" + std::to_string(OldCost) + ", New:" + std::to_string(NewCost); +#endif // DBG_SRS + + if (NewCost < OldCost) { + SRS_DBG_LOG(CmpLn.c_str()); + return true; + } else if (NewCost == OldCost && + ((DCFOption == DCF_OPT::GLOBAL_ONLY && IsGlobal) || + DCFOption == DCF_OPT::GLOBAL_AND_TIGHTEN || + DCFOption == DCF_OPT::GLOBAL_AND_ITERATION)) { + InstCount NewDCFCost = NewSched->GetExtraSpillCost(DCFCostFn); + InstCount OldDCFCost = OldSched->GetExtraSpillCost(DCFCostFn); + +#if DBG_SRS + CmpLn += ", OldDCF:" + std::to_string(OldDCFCost) + + ", NewDCF:" + std::to_string(NewDCFCost); +#endif // DBG_SRS + SRS_DBG_LOG(CmpLn.c_str()); + return (NewDCFCost < OldDCFCost); + + } else { + SRS_DBG_LOG(CmpLn.c_str()); + return false; + } + } else { + InstCount NewCost = NewSched->GetExecCost(); + InstCount OldCost = OldSched->GetExecCost(); + InstCount NewSpillCost = NewSched->GetNormSpillCost(); + InstCount OldSpillCost = OldSched->GetNormSpillCost(); + SRS_DBG_LOG("SRS2P/%sOld:%d,New:%d,OldNSC:%d,NewNSC:%d", + IsGlobal ? "g/" : "", OldCost, NewCost, OldSpillCost, + NewSpillCost); + return (NewCost < OldCost && NewSpillCost <= OldSpillCost) || + NewSpillCost < OldSpillCost; + } } DCF_OPT ACOScheduler::ParseDCFOpt(const std::string &opt) { @@ -237,7 +279,8 @@ Choice ACOScheduler::SelectInstruction(const llvm::ArrayRef &ready, return ready.back(); } -std::unique_ptr ACOScheduler::FindOneSchedule() { +std::unique_ptr +ACOScheduler::FindOneSchedule(InstCount TargetRPCost) { SchedInstruction *lastInst = NULL; std::unique_ptr schedule = llvm::make_unique(machMdl_, dataDepGraph_, true); @@ -362,6 +405,12 @@ std::unique_ptr ACOScheduler::FindOneSchedule() { if (blah == inst) rdyLst_->RemoveNextPriorityInst(); UpdtSlotAvlblty_(inst); + + if (rgn_->getUnnormalizedIncrementalRPCost() > TargetRPCost) { + delete rdyLst_; + rdyLst_ = new ReadyList(dataDepGraph_, prirts_); + return nullptr; + } } /* Logger::Info("Chose instruction %d (for some reason)", instNum); */ schedule->AppendInst(instNum); @@ -384,8 +433,10 @@ FUNC_RESULT ACOScheduler::FindSchedule(InstSchedule *schedule_out, heuristicImportance_ = schedIni.GetInt( IsFirst ? "ACO_HEURISTIC_IMPORTANCE" : "ACO2P_HEURISTIC_IMPORTANCE"); fixed_bias = schedIni.GetInt(IsFirst ? "ACO_FIXED_BIAS" : "ACO2P_FIXED_BIAS"); + ants_per_iteration = IsFirst ? ants_per_iteration1p : ants_per_iteration2p; noImprovementMax = schedIni.GetInt(IsFirst ? "ACO_STOP_ITERATIONS" : "ACO2P_STOP_ITERATIONS"); + Logger::Info("ants/it:%d,stop_iter:%d", ants_per_iteration, noImprovementMax); if (DCFOption != DCF_OPT::OFF) { std::string DcfFnString = schedIni.GetString(IsFirst ? "ACO_DUAL_COST_FN" : "ACO2P_DUAL_COST_FN"); @@ -404,11 +455,15 @@ FUNC_RESULT ACOScheduler::FindSchedule(InstSchedule *schedule_out, for (int i = 0; i < pheromone_size; i++) pheromone_[i] = 1; initialValue_ = 1; - std::unique_ptr heuristicSched = FindOneSchedule(); + const InstCount MaxRPTarget = std::numeric_limits::max(); + std::unique_ptr heuristicSched = FindOneSchedule(MaxRPTarget); InstCount heuristicCost = heuristicSched->GetCost() + 1; // prevent divide by zero InstCount InitialCost = InitialSchedule ? InitialSchedule->GetCost() : 0; + InstCount TargetNSC = InitialSchedule ? InitialSchedule->GetNormSpillCost() + : heuristicSched->GetNormSpillCost(); + #if USE_ACS initialValue_ = 2.0 / ((double)count_ * heuristicCost); #else @@ -430,9 +485,15 @@ FUNC_RESULT ACOScheduler::FindSchedule(InstSchedule *schedule_out, std::unique_ptr iterationBest; for (int i = 0; i < ants_per_iteration; i++) { CrntAntEdges.clear(); - std::unique_ptr schedule = FindOneSchedule(); + std::unique_ptr schedule = FindOneSchedule( + i && rgn_->GetSpillCostFunc() != SCF_SLIL ? TargetNSC : MaxRPTarget); if (print_aco_trace) PrintSchedule(schedule.get()); + ++localCmp; + if (iterationBest && bestSchedule && + !(!IsFirst && iterationBest->GetNormSpillCost() <= + bestSchedule->GetNormSpillCost())) + ++localCmpRej; if (shouldReplaceSchedule(iterationBest.get(), schedule.get(), /*IsGlobal=*/false)) { iterationBest = std::move(schedule); @@ -440,7 +501,11 @@ FUNC_RESULT ACOScheduler::FindSchedule(InstSchedule *schedule_out, IterAntEdges = CrntAntEdges; } } - UpdatePheromone(iterationBest.get()); + ++globalCmp; + if (IsFirst || iterationBest->GetNormSpillCost() <= TargetNSC) { + UpdatePheromone(iterationBest.get()); + } else + ++globalCmpRej; /* PrintSchedule(iterationBest); */ /* std::cout << iterationBest->GetCost() << std::endl; */ // TODO DRY @@ -475,6 +540,9 @@ FUNC_RESULT ACOScheduler::FindSchedule(InstSchedule *schedule_out, iterations++; } + Logger::Info("localCmp:%d,localCmpRej:%d,globalCmp:%d,globalCmpRej:%d", + localCmp, localCmpRej, globalCmp, globalCmpRej); + Logger::Event(IsPostBB ? "AcoPostSchedComplete" : "ACOSchedComplete", "cost", bestSchedule->GetCost(), "iterations", iterations, "improvement", InitialCost - bestSchedule->GetCost()); diff --git a/lib/Scheduler/bb_spill.cpp b/lib/Scheduler/bb_spill.cpp index d22b02d7..53a50975 100644 --- a/lib/Scheduler/bb_spill.cpp +++ b/lib/Scheduler/bb_spill.cpp @@ -56,6 +56,7 @@ BBWithSpill::BBWithSpill(const OptSchedTarget *OST_, DataDepGraph *dataDepGraph, SCW_ = SCW; schedCostFactor_ = COST_WGHT_BASE; trackLiveRangeLngths_ = true; + NeedsComputeSLIL = (spillCostFunc == SCF_SLIL); regTypeCnt_ = OST->MM->GetRegTypeCnt(); regFiles_ = dataDepGraph->getRegFiles(); @@ -110,6 +111,11 @@ ConstrainedScheduler *BBWithSpill::AllocHeuristicScheduler_() { abslutSchedUprBound_, GetHeuristicPriorities()); break; + case SCHED_STALLING_LIST: + return new StallSchedulingListScheduler(dataDepGraph_, machMdl_, + abslutSchedUprBound_, + GetHeuristicPriorities()); + break; } llvm_unreachable("Unknown heuristic scheduler type!"); } @@ -331,6 +337,7 @@ InstCount BBWithSpill::cmputSpillCostLwrBound() { /*****************************************************************************/ void BBWithSpill::addRecordedCost(SPILL_COST_FUNCTION Scf) { + NeedsComputeSLIL |= (Scf == SCF_SLIL); if (!llvm::is_contained(recordedCostFunctions, Scf)) recordedCostFunctions.push_back(Scf); } @@ -339,6 +346,12 @@ void BBWithSpill::addRecordedCost(SPILL_COST_FUNCTION Scf) { void BBWithSpill::storeExtraCost(InstSchedule *sched, SPILL_COST_FUNCTION Scf) { sched->SetExtraSpillCost(Scf, CmputCostForFunction(Scf)); } + +/*****************************************************************************/ + +InstCount BBWithSpill::getUnnormalizedIncrementalRPCost() const { + return crntSpillCost_; +} /*****************************************************************************/ void BBWithSpill::InitForSchdulng() { @@ -486,7 +499,7 @@ void BBWithSpill::UpdateSpillInfoForSchdul_(SchedInstruction *inst, // (Chris): The SLIL calculation below the def and use for-loops doesn't // consider the last use of a register. Thus, an additional increment must // happen here. - if (GetSpillCostFunc() == SCF_SLIL) { + if (needsSLIL()) { sumOfLiveIntervalLengths_[regType]++; if (!use->IsInInterval(inst) && !use->IsInPossibleInterval(inst)) { ++dynamicSlilLowerBound_; @@ -558,7 +571,7 @@ void BBWithSpill::UpdateSpillInfoForSchdul_(SchedInstruction *inst, peakRegPressures_[i] = liveRegs; // (Chris): Compute sum of live range lengths at this point - if (GetSpillCostFunc() == SCF_SLIL) { + if (needsSLIL()) { sumOfLiveIntervalLengths_[i] += liveRegs_[i].GetOneCnt(); for (int j = 0; j < liveRegs_[i].GetSize(); ++j) { if (liveRegs_[i].GetBit(j)) { @@ -619,7 +632,7 @@ void BBWithSpill::UpdateSpillInfoForUnSchdul_(SchedInstruction *inst) { #endif // (Chris): Update the SLIL for all live regs at this point. - if (GetSpillCostFunc() == SCF_SLIL) { + if (needsSLIL()) { for (int i = 0; i < regTypeCnt_; ++i) { for (int j = 0; j < liveRegs_[i].GetSize(); ++j) { if (liveRegs_[i].GetBit(j)) { @@ -678,7 +691,7 @@ void BBWithSpill::UpdateSpillInfoForUnSchdul_(SchedInstruction *inst) { if (isLive == false) { // (Chris): Since this was the last use, the above SLIL calculation didn't // take this instruction into account. - if (GetSpillCostFunc() == SCF_SLIL) { + if (needsSLIL()) { sumOfLiveIntervalLengths_[regType]--; if (!use->IsInInterval(inst) && !use->IsInPossibleInterval(inst)) { --dynamicSlilLowerBound_; @@ -852,11 +865,7 @@ FUNC_RESULT BBWithSpill::Enumerate_(Milliseconds startTime, } /*****************************************************************************/ -// can only compute SLIL if SLIL was the spillCostFunc InstCount BBWithSpill::CmputCostForFunction(SPILL_COST_FUNCTION SpillCF) { - // assert that if we are asking for SLIL that the CF is SLIL - assert(SpillCF != SCF_SLIL || GetSpillCostFunc() == SCF_SLIL); - // return the requested cost switch (SpillCF) { case SCF_TARGET: @@ -957,6 +966,8 @@ void BBWithSpill::UpdtOptmlSchedWghtd(InstSchedule *crntSched, /*****************************************************************************/ +bool BBWithSpill::needsSLIL() const { return NeedsComputeSLIL; } + void BBWithSpill::SetupForSchdulng_() { for (int i = 0; i < regTypeCnt_; i++) { liveRegs_[i].Construct(regFiles_[i].GetRegCnt()); diff --git a/lib/Scheduler/gen_sched.cpp b/lib/Scheduler/gen_sched.cpp index 45393e01..14e6ad10 100644 --- a/lib/Scheduler/gen_sched.cpp +++ b/lib/Scheduler/gen_sched.cpp @@ -107,6 +107,12 @@ bool ConstrainedScheduler::Initialize_(InstCount trgtSchedLngth, return false; } + // wipe the ready list per cycle + for (InstCount i = 0; i < schedUprBound_; ++i) { + if (frstRdyLstPerCycle_[i]) + frstRdyLstPerCycle_[i]->Reset(); + } + // Allocate the first entry in the array. if (frstRdyLstPerCycle_[0] == NULL) { frstRdyLstPerCycle_[0] = new LinkedList; @@ -126,6 +132,9 @@ bool ConstrainedScheduler::Initialize_(InstCount trgtSchedLngth, crntSlotNum_ = 0; crntRealSlotNum_ = 0; crntCycleNum_ = 0; + isCrntCycleBlkd_ = false; + consecEmptyCycles_ = 0; + InitNewCycle_(); rgn_->InitForSchdulng(); diff --git a/lib/Scheduler/list_sched.cpp b/lib/Scheduler/list_sched.cpp index c5ecb0ca..8c9d854d 100644 --- a/lib/Scheduler/list_sched.cpp +++ b/lib/Scheduler/list_sched.cpp @@ -157,3 +157,36 @@ void ListScheduler::UpdtRdyLst_(InstCount cycleNum, int slotNum) { lst2->Reset(); } } + +StallSchedulingListScheduler::StallSchedulingListScheduler( + DataDepGraph *dataDepGraph, MachineModel *machMdl, InstCount schedUprBound, + SchedPriorities prirts) + : ListScheduler(dataDepGraph, machMdl, schedUprBound, prirts) {} + +SchedInstruction *StallSchedulingListScheduler::PickInst() const { + unsigned long CurrentHeuristic; + SchedInstruction *inst = rdyLst_->GetNextPriorityInst(CurrentHeuristic); + + // now inst stores the latency ready instruction w/ the best heuristic + for (InstCount fCycle = 1; fCycle < dataDepGraph_->GetMaxLtncy() && + crntCycleNum_ + fCycle < schedUprBound_; + ++fCycle) { + LinkedList *futureReady = + frstRdyLstPerCycle_[crntCycleNum_ + fCycle]; + if (!futureReady) + continue; + + for (SchedInstruction *fIns = futureReady->GetFrstElmnt(); fIns; + fIns = futureReady->GetNxtElmnt()) { + bool Changed; + unsigned long Heuristic = rdyLst_->CmputKey_(fIns, false, Changed); + if (Heuristic > CurrentHeuristic) { + futureReady->ResetIterator(); + return nullptr; + } + } + futureReady->ResetIterator(); + } + + return ChkInstLglty_(inst) ? inst : nullptr; +} diff --git a/lib/Scheduler/sched_region.cpp b/lib/Scheduler/sched_region.cpp index 11374751..478c3cdd 100644 --- a/lib/Scheduler/sched_region.cpp +++ b/lib/Scheduler/sched_region.cpp @@ -245,8 +245,7 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( stats::problemSize.Record(dataDepGraph_->GetInstCnt()); const auto *GraphTransformations = dataDepGraph_->GetGraphTrans(); - if (BbSchedulerEnabled || GraphTransformations->size() > 0 || - spillCostFunc_ == SCF_SLIL) + if (BbSchedulerEnabled || GraphTransformations->size() > 0 || needsSLIL()) needTransitiveClosure = true; rslt = dataDepGraph_->SetupForSchdulng(needTransitiveClosure); diff --git a/lib/Wrapper/OptimizingScheduler.cpp b/lib/Wrapper/OptimizingScheduler.cpp index 984c51ef..49ea6d79 100644 --- a/lib/Wrapper/OptimizingScheduler.cpp +++ b/lib/Wrapper/OptimizingScheduler.cpp @@ -169,6 +169,8 @@ static SchedulerType parseListSchedType() { return SCHED_LIST; if (SchedTypeString == "SEQ") return SCHED_SEQ; + if (SchedTypeString == "STALLING_LIST") + return SCHED_STALLING_LIST; llvm::report_fatal_error( "Unrecognized option for HEUR_SCHED_TYPE: " + SchedTypeString, false); @@ -863,7 +865,8 @@ void ScheduleDAGOptSched::scheduleOptSchedMinRP() { // Set times for the first pass RegionTimeout = FirstPassRegionTimeout; LengthTimeout = FirstPassLengthTimeout; - HeurSchedType = SCHED_LIST; + if (HeurSchedType == SCHED_SEQ) + HeurSchedType = SCHED_LIST; // Disable relaxed scheduling pruning since we already know what the minimum // length should be in the occupancy pass