-
Notifications
You must be signed in to change notification settings - Fork 0
/
rob.rs
256 lines (234 loc) · 9.41 KB
/
rob.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
use perfect::*;
use perfect::events::*;
use rand::prelude::*;
use perfect::stats::*;
use perfect::asm::Emitter;
fn main() {
let mut harness = HarnessConfig::default_zen2().emit();
ReorderBufferCapacity::run(&mut harness);
}
/// Measure the capacity of the reorder buffer.
///
/// Context
/// =======
///
/// The reorder buffer (ROB) tracks in-flight instructions that have been
/// dispatched into the back-end of the machine but have not yet retired.
///
/// - In Zen 2, the documented reorder buffer capacity is 224 entries
/// - Presumably, each entry corresponds to a single in-flight macro-op (mop)
/// - Presumably, a NOP instruction occupies a single ROB entry
///
/// Test
/// ====
///
/// According to Family 19h PPRs, the "RetireTokenStall" event (0x0af, 0x20)
/// counts "cycles where a dispatch group is valid but does not get dispatched
/// due to a token stall". Presumably, this means that dispatch is stalled
/// for the availability of a reorder buffer entry.
///
/// 0. Assume that a NOP instruction occupies a reorder buffer entry.
/// Ensure that the reorder buffer is drained placing LFENCE very close
/// to the start of measured code.
///
/// 1. While measuring "retire token" stall cycles, speculatively dispatch
/// `N` NOP instructions [which will not stall for any other resources]
///
/// 2. If we observe stall cycles, the reorder buffer capacity must be at least
/// `N` entries.
///
/// Alternatively, we can use the PMC event for "speculatively-dispatched ops"
/// to determine the capacity:
///
/// 0. Assume that a NOP instruction occupies a reorder buffer entry.
///
/// 1. While measuring speculatively dispatched FP ops, speculatively dispatch
/// `N` NOP instructions [which will not stall for other resources], and
/// then a single FNOP instruction.
///
/// 2. If we observe that FNOP has been speculatively dispatched, the reorder
/// buffer capacity must be at least `N+1` entries.
///
///
/// Results
/// =======
///
/// When measuring with the stall cycles, we needed to emit a serializing
/// instruction [to be speculatively dispatched] immediately after the string
/// of tested instructions. Otherwise, no stall cycles would be recorded.
///
/// It seems like using MFENCE instead of LFENCE causes us to measure a couple
/// more stall cycles, but I don't exactly understand why:
///
/// - With LFENCE, stall cycles occur after 217 1-mop instructions
/// - With LFENCE, stall cycles occur after 108 2-mop instructions
///
/// - With MFENCE, stall cycles occur after 221 1-mop instructions
/// - With MFENCE, stall cycles occur after 110 2-mop instructions
///
/// When measuring for the FNOP marker (and MFENCE), we observe that:
///
/// - FNOP is not dispatched after 222 1-mop instructions
/// - FNOP is not dispatched after 111 2-mop instructions
///
pub struct ReorderBufferCapacity;
impl MispredictedReturnTemplate<usize> for ReorderBufferCapacity {}
impl ReorderBufferCapacity {
const CASES: StaticEmitterCases<usize> = StaticEmitterCases::new(&[
EmitterDesc { desc: "nop [1 mop]; lfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; nop) }
dynasm!(f; lfence);
}},
EmitterDesc { desc: "nop15 [1 mop]; lfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; .bytes NOP15) }
dynasm!(f; lfence);
}},
EmitterDesc { desc: "xchg rax,rbx [2 mops]; lfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; xchg rax,rbx) }
dynasm!(f; lfence);
}},
EmitterDesc { desc: "nop [1 mop]; mfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; nop) }
dynasm!(f; mfence);
}},
EmitterDesc { desc: "nop15 [1 mop]; mfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; .bytes NOP15) }
dynasm!(f; mfence);
}},
EmitterDesc { desc: "xchg rax,rbx [2 mops]; mfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; xchg rax,rbx) }
dynasm!(f; mfence);
}},
EmitterDesc { desc: "nop [1 mop]; fnop; mfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; nop) }
dynasm!(f; fnop; mfence );
}},
EmitterDesc { desc: "nop15 [1 mop]; fnop; mfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; .bytes NOP15) }
dynasm!(f; fnop; mfence);
}},
EmitterDesc { desc: "xchg rax,rbx [2 mops]; fnop; mfence",
func: |f, input| {
if input == 0 { return; }
for _ in 0..=input { dynasm!(f ; xchg rax,rbx) }
dynasm!(f; fnop; mfence);
}},
]);
fn parse_results(exp_results: &ExperimentResults<Zen2Event, usize>) {
pub enum Strat { NonZero, Zero, None }
for case_results in exp_results.data.iter() {
println!("[*] Test case '{}'", case_results.desc);
for (event, event_results) in case_results.data.iter() {
let edesc = event.as_desc();
let minmax = event_results.local_minmax();
let (gmin, min_idx) = event_results.global_min();
let (gmax, max_idx) = event_results.global_max();
let strat = match event {
Zen2Event::Dsp0Stall(0x2) |
Zen2Event::DeDisDispatchTokenStalls0(
DeDisDispatchTokenStalls0Mask::RetireTokenStall
) => Strat::NonZero,
Zen2Event::DeDisOpsFromDecoder(DeDisOpsFromDecoderMask::Fp)
=> Strat::Zero,
_ => Strat::None,
};
match strat {
// Find the first test where the minimum observed value
// is non-zero
Strat::NonZero => {
let limit = minmax.iter().enumerate()
.filter(|(idx,x)| x.0 > 0 && *idx != 0)
.next()
.unwrap_or((0, &(0, 0)));
println!("limit={:4} {:03x}:{:02x} {}",
limit.0,
edesc.id(), edesc.mask(),
edesc.name()
);
},
Strat::Zero => {
let limit = minmax.iter().enumerate()
.filter(|(idx,x)| x.0 == 0 && *idx != 0)
.next()
.unwrap_or((0, &(0, 0)));
println!("limit={:4} {:03x}:{:02x} {}",
limit.0,
edesc.id(), edesc.mask(),
edesc.name()
);
},
Strat::None => {
println!("min={:4} (idx={}) max={:4} (idx={}) {:03x}:{:02x} {}",
gmin,
min_idx,
gmax,
max_idx,
edesc.id(), edesc.mask(),
edesc.name()
);
},
}
}
println!();
}
}
fn run(harness: &mut PerfectHarness) {
let mut events = EventSet::new();
// Stall cycles
events.add(Zen2Event::DeDisDispatchTokenStalls0(
DeDisDispatchTokenStalls0Mask::RetireTokenStall
));
// NOTE: It seems like this coincides with the retire stall event?
// (What is this even?)
events.add(Zen2Event::Dsp0Stall(0x2));
// Speculatively-dispatched FP ops
events.add(Zen2Event::DeDisOpsFromDecoder(
DeDisOpsFromDecoderMask::Fp
));
let mut opts = MispredictedReturnOptions::zen2_defaults()
.rdpmc_strat(RdpmcStrategy::Gpr(Gpr::R15));
opts.speculative_epilogue_fn = Some(|f, input| {
dynasm!(f
);
});
// Measure all of the testcases
let mut exp_results = ExperimentResults::new();
for testcase in Self::CASES.iter() {
let mut case_res = ExperimentCaseResults::new(testcase.desc);
for input in 0..=256 {
let asm = Self::emit(opts, input, testcase.func);
let asm_reader = asm.reader();
let asm_tgt_buf = asm_reader.lock();
let asm_tgt_ptr = asm_tgt_buf.ptr(AssemblyOffset(0));
let asm_fn: MeasuredFn = unsafe {
std::mem::transmute(asm_tgt_ptr)
};
for event in events.iter() {
let desc = event.as_desc();
let results = harness.measure(asm_fn,
desc.id(), desc.mask(), 256, InputMethod::Fixed(0, 0)
).unwrap();
case_res.record(*event, input, results);
}
}
exp_results.push(case_res.clone());
}
Self::parse_results(&exp_results);
}
}