forked from ocaml/ocaml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spacetime_profiling.ml
479 lines (451 loc) · 18.4 KB
/
spacetime_profiling.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Mark Shinwell and Leo White, Jane Street Europe *)
(* *)
(* Copyright 2015--2018 Jane Street Group LLC *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
[@@@ocaml.warning "+a-4-30-40-41-42"]
module V = Backend_var
module VP = Backend_var.With_provenance
let node_num_header_words = 2 (* [Node_num_header_words] in the runtime. *)
let index_within_node = ref node_num_header_words
(* The [lazy]s are to ensure that we don't create [V.t]s at toplevel
when not using Spacetime profiling. (This could cause stamps to differ
between bytecode and native .cmis when no .mli is present, e.g.
arch.ml.) *)
let spacetime_node = ref (lazy (Cmm.Cvar (V.create_local "dummy")))
let spacetime_node_ident = ref (lazy (V.create_local "dummy"))
let current_function_label = ref None
let direct_tail_call_point_indexes = ref []
let reverse_shape = ref ([] : Mach.spacetime_shape)
(* CR-someday mshinwell: This code could be updated to use [placeholder_dbg] as
in [Cmmgen]. *)
let cconst_int i = Cmm.Cconst_int (i, Debuginfo.none)
let cconst_natint i = Cmm.Cconst_natint (i, Debuginfo.none)
let cconst_symbol s = Cmm.Cconst_symbol (s, Debuginfo.none)
let something_was_instrumented () =
!index_within_node > node_num_header_words
let next_index_within_node ~part_of_shape ~label =
let index = !index_within_node in
begin match part_of_shape with
| Mach.Direct_call_point _ ->
incr index_within_node;
if Config.enable_call_counts then begin
incr index_within_node
end
| Mach.Indirect_call_point ->
incr index_within_node
| Mach.Allocation_point ->
incr index_within_node;
incr index_within_node;
incr index_within_node
end;
reverse_shape := (part_of_shape, label) :: !reverse_shape;
index
let reset ~spacetime_node_ident:ident ~function_label =
index_within_node := node_num_header_words;
spacetime_node := lazy (Cmm.Cvar ident);
spacetime_node_ident := lazy ident;
direct_tail_call_point_indexes := [];
current_function_label := Some function_label;
reverse_shape := []
let code_for_function_prologue ~function_name ~fun_dbg:dbg ~node_hole =
let node = V.create_local "node" in
let new_node = V.create_local "new_node" in
let must_allocate_node = V.create_local "must_allocate_node" in
let is_new_node = V.create_local "is_new_node" in
let no_tail_calls = List.length !direct_tail_call_point_indexes < 1 in
let open Cmm in
let initialize_direct_tail_call_points_and_return_node =
let new_node_encoded = V.create_local "new_node_encoded" in
(* The callee node pointers within direct tail call points must initially
point back at the start of the current node and be marked as per
[Encode_tail_caller_node] in the runtime. *)
let indexes = !direct_tail_call_point_indexes in
let body =
List.fold_left (fun init_code index ->
(* Cf. [Direct_callee_node] in the runtime. *)
let offset_in_bytes = index * Arch.size_addr in
Csequence (
Cop (Cstore (Word_int, Lambda.Assignment),
[Cop (Caddi, [Cvar new_node; cconst_int offset_in_bytes], dbg);
Cvar new_node_encoded], dbg),
init_code))
(Cvar new_node)
indexes
in
match indexes with
| [] -> body
| _ ->
Clet (VP.create new_node_encoded,
(* Cf. [Encode_tail_caller_node] in the runtime. *)
Cop (Cor, [Cvar new_node; cconst_int 1], dbg),
body)
in
let pc = V.create_local "pc" in
Clet (VP.create node,
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg),
Clet (VP.create must_allocate_node,
Cop (Cand, [Cvar node; cconst_int 1], dbg),
Cifthenelse (
Cop (Ccmpi Cne, [Cvar must_allocate_node; cconst_int 1], dbg),
dbg,
Cvar node,
dbg,
Clet (VP.create is_new_node,
Clet (VP.create pc, cconst_symbol function_name,
Cop (Cextcall ("caml_spacetime_allocate_node",
[| Int |], false, None),
[cconst_int (1 (* header *) + !index_within_node);
Cvar pc;
Cvar node_hole;
],
dbg)),
Clet (VP.create new_node,
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg),
if no_tail_calls then Cvar new_node
else
Cifthenelse (
Cop (Ccmpi Ceq, [Cvar is_new_node; cconst_int 0], dbg),
dbg,
Cvar new_node,
dbg,
initialize_direct_tail_call_points_and_return_node,
dbg))),
dbg)))
let code_for_blockheader ~value's_header ~node ~dbg =
let num_words = Nativeint.shift_right_logical value's_header 10 in
let existing_profinfo = V.create_local "existing_profinfo" in
let existing_count = V.create_local "existing_count" in
let profinfo = V.create_local "profinfo" in
let address_of_profinfo = V.create_local "address_of_profinfo" in
let label = Cmm.new_label () in
let index_within_node =
next_index_within_node ~part_of_shape:Mach.Allocation_point ~label
in
let offset_into_node = Arch.size_addr * index_within_node in
let open Cmm in
let generate_new_profinfo =
(* This will generate a static branch to a function that should usually
be in the cache, which hopefully gives a good code size/performance
balance.
The "Some label" is important: it provides the link between the shape
table, the allocation point, and the frame descriptor table---enabling
the latter table to be used for resolving a program counter at such
a point to a location.
*)
Cop (Cextcall ("caml_spacetime_generate_profinfo", [| Int |],
false, Some label),
[Cvar address_of_profinfo;
cconst_int (index_within_node + 1)],
dbg)
in
(* Check if we have already allocated a profinfo value for this allocation
point with the current backtrace. If so, use that value; if not,
allocate a new one. *)
Clet (VP.create address_of_profinfo,
Cop (Caddi, [
Cvar node;
cconst_int offset_into_node;
], dbg),
Clet (VP.create existing_profinfo,
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar address_of_profinfo],
dbg),
Clet (VP.create profinfo,
Cifthenelse (
Cop (Ccmpi Cne, [Cvar existing_profinfo; cconst_int 1 (* () *)], dbg),
dbg,
Cvar existing_profinfo,
dbg,
generate_new_profinfo,
dbg),
Clet (VP.create existing_count,
Cop (Cload (Word_int, Asttypes.Mutable), [
Cop (Caddi,
[Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg)
], dbg),
Csequence (
Cop (Cstore (Word_int, Lambda.Assignment),
[Cop (Caddi,
[Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg);
Cop (Caddi, [
Cvar existing_count;
(* N.B. "*2" since the count is an OCaml integer.
The "1 +" is to count the value's header. *)
cconst_int (2 * (1 + Nativeint.to_int num_words));
], dbg);
], dbg),
(* [profinfo] looks like a black [Infix_tag] header. Instead of
having to mask [profinfo] before ORing it with the desired
header, we can use an XOR trick, to keep code size down. *)
let value's_header =
Nativeint.logxor value's_header
(Nativeint.logor
((Nativeint.logor (Nativeint.of_int Obj.infix_tag)
(Nativeint.shift_left 3n (* <- Caml_black *) 8)))
(Nativeint.shift_left
(* The following is the [Infix_offset_val], in words. *)
(Nativeint.of_int (index_within_node + 1)) 10))
in
Cop (Cxor, [Cvar profinfo; cconst_natint value's_header], dbg))))))
type callee =
| Direct of string
| Indirect of Cmm.expression
let code_for_call ~node ~callee ~is_tail ~label dbg =
(* We treat self recursive calls as tail calls to avoid blow-ups in the
graph. *)
let is_self_recursive_call =
match callee with
| Direct callee ->
begin match !current_function_label with
| None -> Misc.fatal_error "[current_function_label] not set"
| Some label -> String.equal callee label
end
| Indirect _ -> false
in
let is_tail = is_tail || is_self_recursive_call in
let index_within_node =
match callee with
| Direct callee ->
next_index_within_node
~part_of_shape:(Mach.Direct_call_point { callee; })
~label
| Indirect _ ->
next_index_within_node ~part_of_shape:Mach.Indirect_call_point ~label
in
begin match callee with
(* If this is a direct tail call point, we need to note down its index,
so the correct initialization code can be emitted in the prologue. *)
| Direct _ when is_tail ->
direct_tail_call_point_indexes :=
index_within_node::!direct_tail_call_point_indexes
| Direct _ | Indirect _ -> ()
end;
let place_within_node = V.create_local "place_within_node" in
let open Cmm in
Clet (VP.create place_within_node,
Cop (Caddi, [node; cconst_int (index_within_node * Arch.size_addr)], dbg),
(* The following code returns the address that is to be moved into the
(hard) node hole pointer register immediately before the call.
(That move is inserted in [Selectgen].) *)
match callee with
| Direct _callee ->
if Config.enable_call_counts then begin
let count_addr = V.create_local "call_count_addr" in
let count = V.create_local "call_count" in
Clet (VP.create count_addr,
Cop (Caddi, [Cvar place_within_node; cconst_int Arch.size_addr], dbg),
Clet (VP.create count,
Cop (Cload (Word_int, Asttypes.Mutable), [Cvar count_addr], dbg),
Csequence (
Cop (Cstore (Word_int, Lambda.Assignment),
(* Adding 2 really means adding 1; the count is encoded
as an OCaml integer. *)
[Cvar count_addr; Cop (Caddi, [Cvar count; cconst_int 2], dbg)],
dbg),
Cvar place_within_node)))
end else begin
Cvar place_within_node
end
| Indirect callee ->
let caller_node =
if is_tail then node
else cconst_int 1 (* [Val_unit] *)
in
Cop (Cextcall ("caml_spacetime_indirect_node_hole_ptr",
[| Int |], false, None),
[callee; Cvar place_within_node; caller_node],
dbg))
class virtual instruction_selection = object (self)
inherit Selectgen.selector_generic as super
(* [disable_instrumentation] ensures that we don't try to instrument the
instrumentation... *)
val mutable disable_instrumentation = false
method private instrument_direct_call ~env ~func ~is_tail ~label_after dbg =
let instrumentation =
code_for_call
~node:(Lazy.force !spacetime_node)
~callee:(Direct func)
~is_tail
~label:label_after
dbg
in
match self#emit_expr env instrumentation with
| None -> assert false
| Some reg -> Some reg
method private instrument_indirect_call ~env ~callee ~is_tail
~label_after dbg =
(* [callee] is a pseudoregister, so we have to bind it in the environment
and reference the variable to which it is bound. *)
let callee_ident = V.create_local "callee" in
let env = Selectgen.env_add (VP.create callee_ident) [| callee |] env in
let instrumentation =
code_for_call
~node:(Lazy.force !spacetime_node)
~callee:(Indirect (Cmm.Cvar callee_ident))
~is_tail
~label:label_after
dbg
in
match self#emit_expr env instrumentation with
| None -> assert false
| Some reg -> Some reg
method private can_instrument () =
Config.spacetime && not disable_instrumentation
method! about_to_emit_call env desc arg dbg =
if not (self#can_instrument ()) then None
else
let module M = Mach in
match desc with
| M.Iop (M.Icall_imm { func; label_after; }) ->
assert (Array.length arg = 0);
self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg
| M.Iop (M.Icall_ind { label_after; }) ->
assert (Array.length arg = 1);
self#instrument_indirect_call ~env ~callee:arg.(0)
~is_tail:false ~label_after dbg
| M.Iop (M.Itailcall_imm { func; label_after; }) ->
assert (Array.length arg = 0);
self#instrument_direct_call ~env ~func ~is_tail:true ~label_after dbg
| M.Iop (M.Itailcall_ind { label_after; }) ->
assert (Array.length arg = 1);
self#instrument_indirect_call ~env ~callee:arg.(0)
~is_tail:true ~label_after dbg
| M.Iop (M.Iextcall { func; alloc = true; label_after; }) ->
(* N.B. No need to instrument "noalloc" external calls. *)
assert (Array.length arg = 0);
self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg
| _ -> None
method private instrument_blockheader ~env ~value's_header ~dbg =
let instrumentation =
code_for_blockheader
~node:(Lazy.force !spacetime_node_ident)
~value's_header ~dbg
in
self#emit_expr env instrumentation
method private emit_prologue f ~node_hole ~env =
(* We don't need the prologue unless we inserted some instrumentation.
This corresponds to adding the prologue if the function contains one
or more call or allocation points. *)
if something_was_instrumented () then begin
let prologue_cmm =
code_for_function_prologue ~function_name:f.Cmm.fun_name ~node_hole
~fun_dbg:f.Cmm.fun_dbg
in
disable_instrumentation <- true;
let node_temp_reg =
match self#emit_expr env prologue_cmm with
| None ->
Misc.fatal_error "Spacetime prologue instruction \
selection did not yield a destination register"
| Some node_temp_reg -> node_temp_reg
in
disable_instrumentation <- false;
let node = Lazy.force !spacetime_node_ident in
let node_reg = Selectgen.env_find node env in
self#insert_moves env node_temp_reg node_reg
end
method! emit_blockheader env n dbg =
if self#can_instrument () then begin
disable_instrumentation <- true;
let result = self#instrument_blockheader ~env ~value's_header:n ~dbg in
disable_instrumentation <- false;
result
end else begin
super#emit_blockheader env n dbg
end
method! select_allocation bytes =
if self#can_instrument () then begin
(* Leave space for a direct call point. We cannot easily insert any
instrumentation code, so the fields are filled in instead by
[caml_spacetime_caml_garbage_collection]. *)
let label = Cmm.new_label () in
let index =
next_index_within_node
~part_of_shape:(Mach.Direct_call_point { callee = "caml_call_gc"; })
~label
in
Mach.Ialloc {
bytes;
label_after_call_gc = Some label;
spacetime_index = index;
}
end else begin
super#select_allocation bytes
end
method! select_allocation_args env =
if self#can_instrument () then begin
let regs = Selectgen.env_find (Lazy.force !spacetime_node_ident) env in
match regs with
| [| reg |] -> [| reg |]
| _ -> failwith "Expected one register only for spacetime_node_ident"
end else begin
super#select_allocation_args env
end
method! select_checkbound () =
(* This follows [select_allocation], above. *)
if self#can_instrument () then begin
let label = Cmm.new_label () in
let index =
next_index_within_node
~part_of_shape:(
Mach.Direct_call_point { callee = "caml_ml_array_bound_error"; })
~label
in
Mach.Icheckbound {
label_after_error = Some label;
spacetime_index = index;
}
end else begin
super#select_checkbound ()
end
method! select_checkbound_extra_args () =
if self#can_instrument () then begin
(* This follows [select_allocation_args], above. *)
[Cmm.Cvar (Lazy.force !spacetime_node_ident)]
end else begin
super#select_checkbound_extra_args ()
end
method! initial_env () =
let env = super#initial_env () in
if Config.spacetime then
Selectgen.env_add (VP.create (Lazy.force !spacetime_node_ident))
(self#regs_for Cmm.typ_int) env
else
env
method! emit_fundecl f =
if Config.spacetime then begin
disable_instrumentation <- false;
let node = V.create_local "spacetime_node" in
reset ~spacetime_node_ident:node ~function_label:f.Cmm.fun_name
end;
super#emit_fundecl f
method! insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env =
let fun_spacetime_shape =
super#insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env
in
(* CR-soon mshinwell: add check to make sure the node size doesn't exceed
the chunk size of the allocator *)
if not Config.spacetime then fun_spacetime_shape
else begin
let node_hole, node_hole_reg =
match spacetime_node_hole with
| None -> assert false
| Some (node_hole, reg) -> node_hole, reg
in
self#insert_moves env [| Proc.loc_spacetime_node_hole |] node_hole_reg;
self#emit_prologue f ~node_hole ~env;
match !reverse_shape with
| [] -> None
(* N.B. We do not reverse the shape list, since the function that
reconstructs it (caml_spacetime_shape_table) reverses it again. *)
| reverse_shape -> Some reverse_shape
end
end