-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Swap ports 10 and 11 in SapphireRapids Scheduling Model #117468
base: main
Are you sure you want to change the base?
[X86] Swap ports 10 and 11 in SapphireRapids Scheduling Model #117468
Conversation
Based on intel/perfmon#149, the documentation is incorrect and the pfm counter names are actually correct. This patch adjusts the SapphireRapids scheduling model to match the performance counter naming/ correct naming that will soon be reflected in the optimization manual. This fixes part of llvm#117360.
@llvm/pr-subscribers-backend-x86 Author: Aiden Grossman (boomanaiden154) ChangesBased on intel/perfmon#149, the documentation is incorrect and the pfm counter names are actually correct. This patch adjusts the SapphireRapids scheduling model to match the performance counter naming/ correct naming that will soon be reflected in the optimization manual. This fixes part of #117360. Patch is 2.05 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117468.diff 66 Files Affected:
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 0c80f1eaadadb8..c15a2adc590f58 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -229,10 +229,7 @@ def SapphireRapidsPfmCounters : ProcPfmCounters {
let IssueCounters = [
PfmIssueCounter<"SPRPort00", "uops_dispatched:port_0">,
PfmIssueCounter<"SPRPort01", "uops_dispatched:port_1">,
- // The perfmon documentation and thus libpfm seems to incorrectly label
- // this performance counter, as ports 2,3, and 11 are actually grouped
- // according to most documentation. See #113941 for additional details.
- PfmIssueCounter<"SPRPort02_03_11", "uops_dispatched:port_2_3_10">,
+ PfmIssueCounter<"SPRPort02_03_10", "uops_dispatched:port_2_3_10">,
PfmIssueCounter<"SPRPort04_09", "uops_dispatched:port_4_9">,
PfmIssueCounter<"SPRPort05_11", "uops_dispatched:port_5_11">,
PfmIssueCounter<"SPRPort06", "uops_dispatched:port_6">,
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 8a23d1b103aa6b..e04ff68d278b2b 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -56,15 +56,15 @@ def SPRPort00_05 : ProcResGroup<[SPRPort00, SPRPort05]>;
def SPRPort00_05_06 : ProcResGroup<[SPRPort00, SPRPort05, SPRPort06]>;
def SPRPort00_06 : ProcResGroup<[SPRPort00, SPRPort06]>;
def SPRPort01_05 : ProcResGroup<[SPRPort01, SPRPort05]>;
-def SPRPort01_05_10 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>;
+def SPRPort01_05_11 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort11]>;
def SPRPort02_03 : ProcResGroup<[SPRPort02, SPRPort03]>;
-def SPRPort02_03_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>;
+def SPRPort02_03_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort10]>;
def SPRPort05_11 : ProcResGroup<[SPRPort05, SPRPort11]>;
def SPRPort07_08 : ProcResGroup<[SPRPort07, SPRPort08]>;
// EU has 112 reservation stations.
-def SPRPort00_01_05_06_10 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05,
- SPRPort06, SPRPort10]> {
+def SPRPort00_01_05_06_11 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05,
+ SPRPort06, SPRPort11]> {
let BufferSize = 112;
}
@@ -74,8 +74,8 @@ def SPRPort04_09 : ProcResGroup<[SPRPort04, SPRPort09]> {
}
// MEM has 72 reservation stations.
-def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07,
- SPRPort08, SPRPort11]> {
+def SPRPort02_03_07_08_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07,
+ SPRPort08, SPRPort10]> {
let BufferSize = 72;
}
@@ -113,7 +113,7 @@ multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
// Memory variant also uses a cycle on port 2/3/11 and adds LoadLat cycles to
// the latency (default = 5).
- def : WriteRes<SchedRW.Folded, !listconcat([SPRPort02_03_11], ExePorts)> {
+ def : WriteRes<SchedRW.Folded, !listconcat([SPRPort02_03_10], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ReleaseAtCycles = !listconcat([1], Res);
let NumMicroOps = !add(UOps, LoadUOps);
@@ -126,71 +126,71 @@ multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
// Infered SchedWrite definition.
def : WriteRes<WriteADC, [SPRPort00_06]>;
-defm : X86WriteRes<WriteADCLd, [SPRPort00_01_05_06_10, SPRPort00_06], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteADCLd, [SPRPort00_01_05_06_11, SPRPort00_06], 11, [1, 1], 2>;
defm : SPRWriteResPair<WriteAESDecEnc, [SPRPort00_01], 5, [1], 1, 7>;
defm : SPRWriteResPair<WriteAESIMC, [SPRPort00_01], 8, [2], 2, 7>;
defm : X86WriteRes<WriteAESKeyGen, [SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort05], 7, [4, 1, 1, 2, 3, 3], 14>;
-defm : X86WriteRes<WriteAESKeyGenLd, [SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort05], 12, [4, 1, 2, 3, 1, 3], 14>;
-def : WriteRes<WriteALU, [SPRPort00_01_05_06_10]>;
-def : WriteRes<WriteALULd, [SPRPort00_01_05_06_10]> {
+defm : X86WriteRes<WriteAESKeyGenLd, [SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort02_03_10, SPRPort05], 12, [4, 1, 2, 3, 1, 3], 14>;
+def : WriteRes<WriteALU, [SPRPort00_01_05_06_11]>;
+def : WriteRes<WriteALULd, [SPRPort00_01_05_06_11]> {
let Latency = 11;
}
defm : SPRWriteResPair<WriteBEXTR, [SPRPort00_06, SPRPort01], 6, [1, 1], 2>;
-defm : SPRWriteResPair<WriteBLS, [SPRPort01_05_10], 2, [1]>;
+defm : SPRWriteResPair<WriteBLS, [SPRPort01_05_11], 2, [1]>;
defm : SPRWriteResPair<WriteBSF, [SPRPort01], 3, [1]>;
defm : SPRWriteResPair<WriteBSR, [SPRPort01], 3, [1]>;
def : WriteRes<WriteBSWAP32, [SPRPort01]>;
defm : X86WriteRes<WriteBSWAP64, [SPRPort00_06, SPRPort01], 2, [1, 1], 2>;
defm : SPRWriteResPair<WriteBZHI, [SPRPort01], 3, [1]>;
def : WriteRes<WriteBitTest, [SPRPort01]>;
-defm : X86WriteRes<WriteBitTestImmLd, [SPRPort01, SPRPort02_03_11], 6, [1, 1], 2>;
-defm : X86WriteRes<WriteBitTestRegLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11], 11, [4, 2, 1, 2, 1], 10>;
+defm : X86WriteRes<WriteBitTestImmLd, [SPRPort01, SPRPort02_03_10], 6, [1, 1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11, SPRPort02_03_10], 11, [4, 2, 1, 2, 1], 10>;
def : WriteRes<WriteBitTestSet, [SPRPort01]>;
def : WriteRes<WriteBitTestSetImmLd, [SPRPort01]> {
let Latency = 11;
}
-defm : X86WriteRes<WriteBitTestSetRegLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10], 17, [3, 2, 1, 2], 8>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort01_05_11], 17, [3, 2, 1, 2], 8>;
defm : SPRWriteResPair<WriteBlend, [SPRPort01_05], 1, [1], 1, 7>;
defm : SPRWriteResPair<WriteBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
defm : SPRWriteResPair<WriteCLMul, [SPRPort05], 3, [1], 1, 7>;
defm : SPRWriteResPair<WriteCMOV, [SPRPort00_06], 1, [1], 1, 6>;
-defm : X86WriteRes<WriteCMPXCHG, [SPRPort00_01_05_06_10, SPRPort00_06], 3, [3, 2], 5>;
-defm : X86WriteRes<WriteCMPXCHGRMW, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 2, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteCMPXCHG, [SPRPort00_01_05_06_11, SPRPort00_06], 3, [3, 2], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 12, [1, 2, 1, 1, 1], 6>;
defm : SPRWriteResPair<WriteCRC32, [SPRPort01], 3, [1]>;
defm : X86WriteRes<WriteCvtI2PD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtI2PDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtI2PDY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtI2PDYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtI2PDZ, [SPRPort00], 4, [1], 1, 8>;
defm : SPRWriteResPair<WriteCvtI2PS, [SPRPort00_01], 4, [1], 1, 7>;
defm : SPRWriteResPair<WriteCvtI2PSY, [SPRPort00_01], 4, [1], 1, 8>;
defm : SPRWriteResPair<WriteCvtI2PSZ, [SPRPort00], 4, [1], 1, 8>;
defm : X86WriteRes<WriteCvtI2SD, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtI2SDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2SDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtI2SS, [SPRPort00_01, SPRPort00_01_05, SPRPort05], 9, [1, 1, 1], 3>;
-defm : X86WriteRes<WriteCvtI2SSLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2SSLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPD2I, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPD2ILd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2ILd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPD2IY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPD2IYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPD2IZ, [SPRPort00, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPD2IZLd, [SPRPort00, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IZLd, [SPRPort00, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPD2PS, [SPRPort00_01, SPRPort05], 5, [1, 1], 2, 7>;
defm : SPRWriteResPair<WriteCvtPD2PSY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2, 8>;
defm : SPRWriteResPair<WriteCvtPD2PSZ, [SPRPort00, SPRPort05], 7, [1, 1], 2, 8>;
defm : X86WriteRes<WriteCvtPH2PS, [SPRPort00_01, SPRPort05], 6, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SPRPort00_01, SPRPort05], 8, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPH2PSZ, [SPRPort00, SPRPort05], 11, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPS2I, [SPRPort00_01], 4, [1], 1, 7>;
defm : SPRWriteResPair<WriteCvtPS2IY, [SPRPort00_01], 4, [1], 1, 8>;
defm : X86WriteRes<WriteCvtPS2IZ, [SPRPort00, SPRPort00_05, SPRPort05], 10, [1, 2, 1], 4>;
-defm : X86WriteRes<WriteCvtPS2IZLd, [SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05], 18, [1, 2, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteCvtPS2IZLd, [SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_10, SPRPort05], 18, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteCvtPS2PD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPS2PDY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDYLd, [SPRPort00_01, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtPS2PDZ, [SPRPort00, SPRPort05], 7, [1, 1], 2, 6>;
defm : X86WriteRes<WriteCvtPS2PH, [SPRPort00_01, SPRPort05], 6, [1, 1], 2>;
defm : X86WriteRes<WriteCvtPS2PHSt, [SPRPort00_01, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1], 3>;
@@ -202,12 +202,12 @@ defm : SPRWriteResPair<WriteCvtSD2I, [SPRPort00, SPRPort00_01], 7, [1, 1], 2>;
defm : SPRWriteResPair<WriteCvtSD2SS, [SPRPort00_01, SPRPort05], 5, [1, 1], 2, 7>;
defm : SPRWriteResPair<WriteCvtSS2I, [SPRPort00, SPRPort00_01], 7, [1, 1], 2>;
defm : X86WriteRes<WriteCvtSS2SD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtSS2SDLd, [SPRPort00_01, SPRPort02_03_10], 11, [1, 1], 2>;
defm : SPRWriteResPair<WriteDPPD, [SPRPort00_01, SPRPort01_05], 9, [2, 1], 3, 7>;
defm : SPRWriteResPair<WriteDPPS, [SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort05], 14, [2, 1, 2, 1], 6, 7>;
defm : SPRWriteResPair<WriteDPPSY, [SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort05], 14, [2, 1, 2, 1], 6, 8>;
-defm : SPRWriteResPair<WriteDiv16, [SPRPort00_01_05_06_10, SPRPort01], 16, [1, 3], 4, 4>;
-defm : SPRWriteResPair<WriteDiv32, [SPRPort00_01_05_06_10, SPRPort01], 15, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteDiv16, [SPRPort00_01_05_06_11, SPRPort01], 16, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteDiv32, [SPRPort00_01_05_06_11, SPRPort01], 15, [1, 3], 4, 4>;
defm : SPRWriteResPair<WriteDiv64, [SPRPort01], 18, [3], 3>;
defm : X86WriteRes<WriteDiv8, [SPRPort01], 17, [3], 3>;
defm : X86WriteRes<WriteDiv8Ld, [SPRPort01], 22, [3], 3>;
@@ -235,7 +235,7 @@ defm : SPRWriteResPair<WriteFCmpY, [SPRPort00_01], 4, [1], 1, 8>;
def : WriteRes<WriteFCmpZ, [SPRPort05]> {
let Latency = 3;
}
-defm : X86WriteRes<WriteFCmpZLd, [SPRPort00, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteFCmpZLd, [SPRPort00, SPRPort02_03_10], 12, [1, 1], 2>;
defm : SPRWriteResPair<WriteFCom, [SPRPort05], 1, [1], 1, 7>;
defm : SPRWriteResPair<WriteFComX, [SPRPort00], 3, [1]>;
defm : SPRWriteResPair<WriteFDiv, [SPRPort00], 11, [1], 1, 7>;
@@ -251,13 +251,13 @@ defm : SPRWriteResPair<WriteFHAddY, [SPRPort01_05, SPRPort05], 5, [1, 2], 3, 8>;
def : WriteRes<WriteFLD0, [SPRPort00_05]>;
defm : X86WriteRes<WriteFLD1, [SPRPort00_05], 1, [2], 2>;
defm : X86WriteRes<WriteFLDC, [SPRPort00_05], 1, [2], 2>;
-def : WriteRes<WriteFLoad, [SPRPort02_03_11]> {
+def : WriteRes<WriteFLoad, [SPRPort02_03_10]> {
let Latency = 7;
}
-def : WriteRes<WriteFLoadX, [SPRPort02_03_11]> {
+def : WriteRes<WriteFLoadX, [SPRPort02_03_10]> {
let Latency = 7;
}
-def : WriteRes<WriteFLoadY, [SPRPort02_03_11]> {
+def : WriteRes<WriteFLoadY, [SPRPort02_03_10]> {
let Latency = 8;
}
defm : SPRWriteResPair<WriteFLogic, [SPRPort00_01_05], 1, [1], 1, 7>;
@@ -270,8 +270,8 @@ defm : SPRWriteResPair<WriteFMAZ, [SPRPort00], 4, [1], 1, 8>;
def : WriteRes<WriteFMOVMSK, [SPRPort00]> {
let Latency = 3;
}
-defm : X86WriteRes<WriteFMaskedLoad, [SPRPort00_01_05, SPRPort02_03_11], 8, [1, 1], 2>;
-defm : X86WriteRes<WriteFMaskedLoadY, [SPRPort00_01_05, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteFMaskedLoad, [SPRPort00_01_05, SPRPort02_03_10], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [SPRPort00_01_05, SPRPort02_03_10], 9, [1, 1], 2>;
defm : X86WriteRes<WriteFMaskedStore32, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
defm : X86WriteRes<WriteFMaskedStore32Y, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
defm : X86WriteRes<WriteFMaskedStore64, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
@@ -334,15 +334,15 @@ defm : SPRWriteResPair<WriteFVarShuffleZ, [SPRPort05], 1, [1], 1, 8>;
def : WriteRes<WriteFence, [SPRPort00_06]> {
let Latency = 2;
}
-defm : SPRWriteResPair<WriteIDiv16, [SPRPort00_01_05_06_10, SPRPort01], 16, [1, 3], 4, 4>;
-defm : SPRWriteResPair<WriteIDiv32, [SPRPort00_01_05_06_10, SPRPort01], 15, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteIDiv16, [SPRPort00_01_05_06_11, SPRPort01], 16, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteIDiv32, [SPRPort00_01_05_06_11, SPRPort01], 15, [1, 3], 4, 4>;
defm : SPRWriteResPair<WriteIDiv64, [SPRPort01], 18, [3], 3>;
defm : X86WriteRes<WriteIDiv8, [SPRPort01], 17, [3], 3>;
defm : X86WriteRes<WriteIDiv8Ld, [SPRPort01], 22, [3], 3>;
-defm : SPRWriteResPair<WriteIMul16, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 5, [2, 1, 1], 4>;
-defm : SPRWriteResPair<WriteIMul16Imm, [SPRPort00_01_05_06_10, SPRPort01], 4, [1, 1], 2>;
+defm : SPRWriteResPair<WriteIMul16, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 5, [2, 1, 1], 4>;
+defm : SPRWriteResPair<WriteIMul16Imm, [SPRPort00_01_05_06_11, SPRPort01], 4, [1, 1], 2>;
defm : SPRWriteResPair<WriteIMul16Reg, [SPRPort01], 3, [1]>;
-defm : SPRWriteResPair<WriteIMul32, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 3>;
+defm : SPRWriteResPair<WriteIMul32, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 3>;
defm : SPRWriteResPair<WriteIMul32Imm, [SPRPort01], 3, [1]>;
defm : SPRWriteResPair<WriteIMul32Reg, [SPRPort01], 3, [1]>;
defm : SPRWriteResPair<WriteIMul64, [SPRPort01, SPRPort05], 4, [1, 1], 2>;
@@ -359,10 +359,10 @@ defm : SPRWriteResPair<WriteJump, [SPRPort00_06], 1, [1]>;
def : WriteRes<WriteLAHFSAHF, [SPRPort00_06]> {
let Latency = 3;
}
-defm : X86WriteRes<WriteLDMXCSR, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11], 7, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteLDMXCSR, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_10], 7, [1, 1, 1, 1], 4>;
def : WriteRes<WriteLEA, [SPRPort01]>;
defm : SPRWriteResPair<WriteLZCNT, [SPRPort01], 3, [1]>;
-def : WriteRes<WriteLoad, [SPRPort02_03_11]> {
+def : WriteRes<WriteLoad, [SPRPort02_03_10]> {
let Latency = 5;
}
def : WriteRes<WriteMMXMOVMSK, [SPRPort00]> {
@@ -370,7 +370,7 @@ def : WriteRes<WriteMMXMOVMSK, [SPRPort00]> {
}
defm : SPRWriteResPair<WriteMPSAD, [SPRPort01_05, SPRPort05], 4, [1, 1], 2, 7>;
defm : SPRWriteResPair<WriteMPSADY, [SPRPort01_05, SPRPort05], 4, [1, 1], 2, 8>;
-defm : SPRWriteResPair<WriteMULX32, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 2>;
+defm : SPRWriteResPair<WriteMULX32, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 2>;
defm : SPRWriteResPair<WriteMULX64, [SPRPort01, SPRPort05], 4, [1, 1]>;
def : WriteRes<WriteMicrocoded, [SPRPort00_01_05_06]> {
let Latency = SapphireRapidsModel.MaxLatency;
@@ -380,9 +380,9 @@ def : WriteRes<WriteMove, [SPRPort00]> {
}
defm : X86WriteRes<WriteNop, [], 1, [], 0>;
defm : X86WriteRes<WritePCmpEStrI, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort05], 16, [3, 2, 1, 1, 1], 8>;
-defm : X86WriteRes<WritePCmpEStrILd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05], 31, [3, 1, 1, 1, 1, 1], 8>;
+defm : X86WriteRes<WritePCmpEStrILd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05], 31, [3, 1, 1, 1, 1, 1], 8>;
defm : X86WriteRes<WritePCmpEStrM, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort05], 16, [3, 3, 1, 1, 1], 9>;
-defm : X86WriteRes<WritePCmpEStrMLd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05], 17, [3, 2, 1, 1, 1, 1], 9>;
+defm : X86WriteRes<WritePCmpEStrMLd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort05], 17, [3, 2, 1, 1, 1, 1], 9>;
defm : SPRWriteResPair<WritePCmpIStrI, [SPRPort00], 11, [3], 3, 20>;
defm : SPRWriteResPair<WritePCmpIStrM, [SPRPort00], 11, [3], 3>;
defm : SPRWriteResPair<WritePHAdd, [SPRPort00_05, SPRPort05], 3, [1, 2], 3, 8>;
@@ -397,16 +397,16 @@ defm : SPRWriteResPair<WritePSADBW, [SPRPort05], 3, [1], 1, 8>;
defm : SPRWriteResPair<WritePSADBWX, [SPRPort05], 3, [1], 1, 7>;
defm : SPRWriteResPair<WritePSADBWY, [SPRPort05], 3, [1], 1, 8>;
defm : SPRWriteResPair<WritePSADBWZ, [SPRPort05], 3, [1], 1, 8>;
-defm : X86WriteRes<WriteRMW, [SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 1, [1, 1, 1], 3>;
-defm : X86WriteRes<WriteRotate, [SPRPort00_01_05_06_10, SPRPort00_06], 2, [1, 2], 3>;
-defm : X86WriteRes<WriteRotateLd, [SPRPort00_01_05_06_10, SPRPort00_06], 12, [1, 2], 3>;
+defm : X86WriteRes<WriteRMW, [SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 1, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteRotate, [SPRPort00_01_05_06_11, SPRPort00_06], 2, [1, 2], 3>;
+defm : X86WriteRes<WriteRotateLd, [SPRPort00_01_05_06_11, SPRPort00_06], 12, [1, 2], 3>;
defm : X86WriteRes<WriteRotateCL, [SPRPort00_06], 2, [2], 2>;
-defm : X86WriteRes<WriteRotateCLLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 19, [2, 3, 2], 7>;
+defm : X86WriteRes<WriteRotateCLLd, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 19, [2, 3, 2], 7>;
defm : X86WriteRes<WriteSETCC, [SPRPort00_06], 2, [2], 2>;
defm : X86WriteRes<WriteSETCCStore, [SPRPort00_06, SPRPort04_09, SPRPort07_08], 13, [2, 1, 1], 4>;
-defm : X86WriteRes<WriteSHDmrcl, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1, 1], 6>;
-defm : X86WriteRes<WriteSHDmri, [SPRPort00_01_05_06_10, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1], 5>;
-defm : X86WriteRes<WriteSHDrrcl, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 5, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteSHDmrcl, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteSHDmri, [SPRPort00_01_05_06_11, SPRPort01, SPRPort02_03_10, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1], 5>;
+defm : X86WriteRes<WriteSHDrrcl, [SPRPort00_01_05_06_11, SPRPort00_06, SPRPort01], 5, [1, 1, 1], 3>;
def : WriteRes<WriteSHDrri, [SPRPort01]> {
let Latency = 3;
}
@@ -434,7 +434,7 @@ defm : SPRWriteResPair<WriteVarBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
defm : SPRWriteResPair<WriteVarBlendZ, [SPRPort00_05], 1, [...
[truncated]
|
Based on intel/perfmon#149, the documentation is incorrect and the pfm counter names are actually correct. This patch adjusts the SapphireRapids scheduling model to match the performance counter naming/ correct naming that will soon be reflected in the optimization manual.
This fixes part of #117360.