-
Notifications
You must be signed in to change notification settings - Fork 0
/
ldq.rs
206 lines (171 loc) · 6.63 KB
/
ldq.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
use perfect::*;
use perfect::events::*;
use rand::prelude::*;
use perfect::stats::*;
use perfect::asm::Emitter;
fn main() {
let mut harness = HarnessConfig::default_zen2().emit();
LoadQueueCapacity::run(&mut harness);
}
/// Create load queue pressure.
///
/// Explanation
/// ===========
///
/// The load queue keeps track of the addresses/values of recent stores.
/// The Family 17h SOG mentions that the store queue capacity is 48 entries.
///
/// Test
/// ====
///
/// Execute many stores to different addresses and measure stall cycles
/// (with 'StoreQueueRsrcStall'). When we exceed the store queue capacity,
/// the number of stall cycles will be nonzero.
///
/// Results
/// =======
///
/// Stall cycles observed when we perform more than 48 stores.
///
pub struct LoadQueueCapacity;
impl MispredictedReturnTemplate<usize> for LoadQueueCapacity {}
impl LoadQueueCapacity {
const CASES: StaticEmitterCases<usize> = StaticEmitterCases::new(&[
EmitterDesc { desc: "mov r64, [imm]",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; mov Rq(0), [0x1000]); }
}},
EmitterDesc { desc: "mov r32, [imm]",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; mov Rd(0), [0x1000]); }
}},
EmitterDesc { desc: "mov r16, [imm]",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; mov Rw(0), [0x1000]); }
}},
EmitterDesc { desc: "prefetch [imm]",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; prefetch [0x1000]); }
}},
EmitterDesc { desc: "prefetchnta [imm]",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; prefetchnta [0x1000]); }
}},
]);
//pub fn emit(num_loads: usize) -> X64Assembler {
// let mut rng = rand::thread_rng();
// let mut f = X64Assembler::new().unwrap();
// let mut addrs: Vec<i32> = (0x0001_0000..=0x0002_1000)
// .step_by(64).collect();
// assert!(num_loads < addrs.len());
// addrs.shuffle(&mut rng);
// for addr in &addrs[0..=num_loads] {
// dynasm!(f
// ; mov rax, *addr as _
// ; clflush [rax]
// );
// }
// dynasm!(f
// ; mov rax, 0x1111_dead_1111_dead
// ; vmovq xmm0, rax
// ; vpbroadcastq ymm0, xmm0
// ; lfence
// ; .align 4096
// ; lfence
// ; mfence
// );
// f.emit_rdpmc_start(0, Gpr::R15 as u8);
// for addr in &addrs[0..=num_loads] {
// dynasm!(f ; mov rax, [*addr]); // 8B
// //dynasm!(f ; mov eax, [*addr]); // 4B
// //dynasm!(f ; mov ax, [*addr]); // 2B
// //dynasm!(f ; mov al, [*addr]); // 1B
// //dynasm!(f ; movnti rax, [*addr]);
// //dynasm!(f ; vmovd xmm0, [*addr]); // 4B
// //dynasm!(f ; vmovq xmm0, [*addr]); // 8B
// //dynasm!(f ; vmovdqa xmm0, [*addr]); // 16B
// //dynasm!(f ; vmovdqa ymm0, [*addr]); // 32B
// }
// f.emit_rdpmc_end(0, Gpr::R15 as u8, Gpr::Rax as u8);
// f.emit_ret();
// f.commit().unwrap();
// f
//}
//pub fn run(harness: &mut PerfectHarness) {
// let mut events = EventSet::new();
// //events.add(Zen2Event::LsDispatch(LsDispatchMask::LdDispatch));
// events.add(Zen2Event::DeDisDispatchTokenStalls1(
// DeDisDispatchTokenStalls1Mask::LoadQueueRsrcStall
// ));
// for num_loads in 0..=128 {
// let asm = Self::emit(num_loads);
// let asm_reader = asm.reader();
// let asm_tgt_buf = asm_reader.lock();
// let asm_tgt_ptr = asm_tgt_buf.ptr(AssemblyOffset(0));
// let asm_fn: MeasuredFn = unsafe {
// std::mem::transmute(asm_tgt_ptr)
// };
// println!("[*] num_loads={}", num_loads);
// for event in events.iter() {
// let desc = event.as_desc();
// let results = harness.measure(asm_fn,
// desc.id(), desc.mask(), 128, InputMethod::Fixed(0, 0),
// ).unwrap();
// let dist = results.get_distribution();
// let min = results.get_min();
// let max = results.get_max();
// println!(" {:03x}:{:02x} {:032} min={:3} max={:3}",
// desc.id(), desc.mask(), desc.name(), min, max);
// }
// }
//}
fn run(harness: &mut PerfectHarness) {
let mut events = EventSet::new();
events.add(Zen2Event::DeDisDispatchTokenStalls1(
DeDisDispatchTokenStalls1Mask::LoadQueueRsrcStall
));
let opts = MispredictedReturnOptions::zen2_defaults()
.prologue_fn(Some(|f, input| {
dynasm!(f
);
}))
.rdpmc_strat(RdpmcStrategy::Gpr(Gpr::R15));
let mut exp_results = ExperimentResults::new();
for testcase in Self::CASES.iter() {
println!("[*] Testcase '{}'", testcase.desc);
let mut case_res = ExperimentCaseResults::new(testcase.desc);
for input in 0..=128 {
let asm = Self::emit(opts, input, testcase.func);
let asm_reader = asm.reader();
let asm_tgt_buf = asm_reader.lock();
let asm_tgt_ptr = asm_tgt_buf.ptr(AssemblyOffset(0));
let asm_fn: MeasuredFn = unsafe {
std::mem::transmute(asm_tgt_ptr)
};
for event in events.iter() {
let desc = event.as_desc();
let results = harness.measure(asm_fn,
desc.id(), desc.mask(), 256, InputMethod::Fixed(0, 0)
).unwrap();
case_res.record(*event, input, results);
}
}
exp_results.push(case_res.clone());
}
for case_results in exp_results.data.iter() {
for (event, event_results) in case_results.data.iter() {
let edesc = event.as_desc();
let minmax = event_results.local_minmax();
// Find the first test where the minimum observed number of
// events is non-zero
let limit = minmax.iter().enumerate()
.filter(|(idx,x)| x.0 > 0)
.next()
.unwrap_or((0, &(0, 0)));
println!("{:03x}:{:02x}, limit={:4} ({})",
edesc.id(), edesc.mask(), limit.0, case_results.desc
);
}
}
}
}