-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstq.rs
206 lines (173 loc) · 6.84 KB
/
stq.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
use perfect::*;
use perfect::events::*;
use perfect::stats::*;
use rand::prelude::*;
use perfect::asm::Emitter;
fn main() {
let mut harness = HarnessConfig::default_zen2().emit();
StoreQueueCapacity::run(&mut harness);
}
/// Create store queue pressure.
///
/// Explanation
/// ===========
///
/// The store queue keeps track of the addresses/values of recent stores.
/// The Family 17h SOG mentions that the store queue capacity is 48 entries.
///
/// Test
/// ====
///
/// Execute many stores to different addresses and measure stall cycles
/// (with 'StoreQueueRsrcStall'). When we exceed the store queue capacity,
/// the number of stall cycles will be nonzero.
///
/// Results
/// =======
///
/// Stall cycles observed when we perform more than 48 stores.
///
pub struct StoreQueueCapacity;
impl MispredictedReturnTemplate<usize> for StoreQueueCapacity {}
impl StoreQueueCapacity {
const CASES: StaticEmitterCases<usize> = StaticEmitterCases::new(&[
EmitterDesc { desc: "mov [imm], r64",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; mov [0x1000], Rq(0)); }
}},
EmitterDesc { desc: "mov [imm], r32",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; mov [0x1000], Rd(0)); }
}},
EmitterDesc { desc: "mov [imm], r16",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; mov [0x1000], Rw(0)); }
}},
EmitterDesc { desc: "movnti [imm], r64",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; movnti [0x1000], Rq(0)); }
}},
EmitterDesc { desc: "movnti [imm], r32",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; movnti [0x1000], Rd(0)); }
}},
EmitterDesc { desc: "sfence",
func: |f, input| {
for _ in 0..=input { dynasm!(f ; sfence) }
}},
]);
//pub fn emit(num_stores: usize) -> X64Assembler {
// let mut rng = rand::thread_rng();
// let mut f = X64Assembler::new().unwrap();
// // Generate random addresses for the stores
// let mut addrs: Vec<i32> = (0x0001_0000..=0x0001_1000)
// .step_by(64).collect();
// assert!(num_stores < addrs.len());
// addrs.shuffle(&mut rng);
// dynasm!(f
// ; mov rax, 0x1111_dead_1111_dead
// ; vmovq xmm0, rax
// ; vpbroadcastq ymm0, xmm0
// ; lfence
// ; .align 4096
// ; lfence
// );
// f.emit_rdpmc_start(0, Gpr::R15 as u8);
// // Insert stores.
// // The width of the store doesn't seem to matter.
// for addr in &addrs[0..=num_stores] {
// dynasm!(f ; mov [*addr], rax ); // 8B
// //dynasm!(f ; mov [*addr], eax ); // 4B
// //dynasm!(f ; mov [*addr], ax ); // 2B
// //dynasm!(f ; mov [*addr], al ); // 1B
// //dynasm!(f ; movnti [*addr], rax );
// //dynasm!(f ; vmovd [*addr], xmm0); // 4B
// //dynasm!(f ; vmovq [*addr], xmm0); // 8B
// //dynasm!(f ; vmovdqa [*addr], xmm0); // 16B
// //dynasm!(f ; vmovdqa [*addr], ymm0); // 32B
// }
// //dynasm!(f ; sfence);
// f.emit_rdpmc_end(0, Gpr::R15 as u8, Gpr::Rax as u8);
// f.emit_ret();
// f.commit().unwrap();
// f
//}
//pub fn run(harness: &mut PerfectHarness) {
// let mut events = EventSet::new();
// events.add(Zen2Event::LsDispatch(LsDispatchMask::StDispatch));
// events.add(Zen2Event::LsNotHaltedCyc(0x00));
// events.add(Zen2Event::DeDisDispatchTokenStalls1(
// DeDisDispatchTokenStalls1Mask::StoreQueueRsrcStall
// ));
// for num_stores in 0..=49 {
// let asm = Self::emit(num_stores);
// let asm_reader = asm.reader();
// let asm_tgt_buf = asm_reader.lock();
// let asm_tgt_ptr = asm_tgt_buf.ptr(AssemblyOffset(0));
// let asm_fn: MeasuredFn = unsafe {
// std::mem::transmute(asm_tgt_ptr)
// };
// println!("[*] num_stores={}", num_stores);
// for event in events.iter() {
// let desc = event.as_desc();
// let results = harness.measure(asm_fn,
// desc.id(), desc.mask(), 16, InputMethod::Fixed(0, 0),
// ).unwrap();
// let dist = results.get_distribution();
// let min = results.get_min();
// let max = results.get_max();
// println!(" {:03x}:{:02x} {:032} min={:3} max={:3} dist={:?}",
// desc.id(), desc.mask(), desc.name(), min, max, dist);
// }
// }
//}
fn run(harness: &mut PerfectHarness) {
let mut events = EventSet::new();
events.add(Zen2Event::DeDisDispatchTokenStalls1(
DeDisDispatchTokenStalls1Mask::StoreQueueRsrcStall
));
let opts = MispredictedReturnOptions::zen2_defaults()
.prologue_fn(Some(|f, input| {
dynasm!(f
);
}))
.rdpmc_strat(RdpmcStrategy::Gpr(Gpr::R15));
let mut exp_results = ExperimentResults::new();
for testcase in Self::CASES.iter() {
println!("[*] Testcase '{}'", testcase.desc);
let mut case_res = ExperimentCaseResults::new(testcase.desc);
for input in 0..=128 {
let asm = Self::emit(opts, input, testcase.func);
let asm_reader = asm.reader();
let asm_tgt_buf = asm_reader.lock();
let asm_tgt_ptr = asm_tgt_buf.ptr(AssemblyOffset(0));
let asm_fn: MeasuredFn = unsafe {
std::mem::transmute(asm_tgt_ptr)
};
for event in events.iter() {
let desc = event.as_desc();
let results = harness.measure(asm_fn,
desc.id(), desc.mask(), 256, InputMethod::Fixed(0, 0)
).unwrap();
case_res.record(*event, input, results);
}
}
exp_results.push(case_res.clone());
}
for case_results in exp_results.data.iter() {
for (event, event_results) in case_results.data.iter() {
let edesc = event.as_desc();
let minmax = event_results.local_minmax();
// Find the first test where the minimum observed number of
// events is non-zero
let limit = minmax.iter().enumerate()
.filter(|(idx,x)| x.0 > 0)
.next()
.unwrap_or((0, &(0, 0)));
println!("{:03x}:{:02x}, limit={:4} ({})",
edesc.id(), edesc.mask(), limit.0, case_results.desc
);
}
}
}
}