Skip to content

Commit

Permalink
CP-49078: Preprocess fields into a Hashtbl within get_record (#6114)
Browse files Browse the repository at this point in the history
Flame graphs indicate that, under load created by parallel "xe vm-list"
commands, the DB action get_record is hit often. This function
constructs an API-level record by marshalling an association list that
maps field names to unmarshalled string values. To do this, it serially
queries all the field names using `List.assoc`. This has rather large
cost in doing lexicographical string comparisons (`caml_compare` on
string keys).

To avoid this, regardless of record size, we preprocess the association
lists `__regular_fields` and `__set_refs` into a (string, string)
Hashtbl.t and query that to construct each record field.

---

This benefit of this change is most notable for large records (such as
`VM` and `pool`). The cost of the previously generated code, which does
a bunch of serial `List.assoc` calls, incurs the quadratic cost of list
traversal (compounded by the costly lexicographical comparison of
strings during the search).

To produce measurements, I sampled xapi under a load of 500 consecutive
`xe vm-list` invocations (using the same sampling rate with `perf`) on a
host with a single VM.

Without the change, the `get_record` done internally by `xe vm-list`
makes up for ~33.59% of the samples (33.59% = 1,592,782,255 samples).
With the change, `get_record` accounts for ~7.56 of the samples (of
which there are substantially fewer collected: 7.56% = 264,948,239). So
the number of samples for `get_record` has dropped from 1,592,782,255 to
264,948,239 (assuming `perf`'s sampling is reliable).

You can see the visual difference in the flame graphs:

Before:

![{B1FEFF3A-AD91-478B-A828-89DCD19C2BEA}](https://github.com/user-attachments/assets/b7a4d504-3894-4f34-8dbe-b63de0e1d88c)

After:

![{CB28FFEC-944D-4F26-918A-FFB57E4875A3}](https://github.com/user-attachments/assets/fa517307-deb6-4a49-9a18-8217deb38734)

Of course, this is benefit as measured in aggregate (`perf` sampling),
so quite a fast and loose comparison. In practice, the `xe vm-list`
stress test goes from 7.4s to 6.2s (as `get_record` makes up only a
small part of the work done for a single `xe vm-list`).
  • Loading branch information
psafont authored Nov 18, 2024
2 parents ddfea5b + 3a49e86 commit 1da872b
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 64 deletions.
1 change: 1 addition & 0 deletions ocaml/database/db_cache_types.ml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ module Row = struct
with Not_found -> raise (DBCache_NotFound ("missing field", key, ""))

let add_defaults g (schema : Schema.Table.t) t =
let schema = Schema.Table.t'_of_t schema in
List.fold_left
(fun t c ->
if not (mem c.Schema.Column.name t) then
Expand Down
99 changes: 88 additions & 11 deletions ocaml/database/schema.ml
Original file line number Diff line number Diff line change
Expand Up @@ -96,28 +96,104 @@ module Column = struct
(** only so we can special case set refs in the interface *)
}
[@@deriving sexp]

let name_of t = t.name
end

let tabulate ks ~key_fn =
let tbl = Hashtbl.create 64 in
List.iter (fun c -> Hashtbl.replace tbl (key_fn c) c) ks ;
tbl

let values_of_table tbl = Hashtbl.fold (fun _ v vs -> v :: vs) tbl []

module Table = struct
type t = {name: string; columns: Column.t list; persistent: bool}
type t' = {name: string; columns: Column.t list; persistent: bool}
[@@deriving sexp]

let find name t =
try List.find (fun col -> col.Column.name = name) t.columns
with Not_found ->
raise (Db_exn.DBCache_NotFound ("missing column", t.name, name))
type t = {
name: string
; columns: (string, Column.t) Hashtbl.t
; persistent: bool
}

let t'_of_t : t -> t' =
fun (t : t) ->
let ({name; columns; persistent} : t) = t in
let columns = values_of_table columns in
{name; columns; persistent}

let t_of_t' : t' -> t =
fun (t' : t') ->
let ({name; columns; persistent} : t') = t' in
let columns = tabulate columns ~key_fn:Column.name_of in
{name; columns; persistent}

let sexp_of_t t =
let t' = t'_of_t t in
sexp_of_t' t'

let t_of_sexp s =
let ({name; columns; persistent} : t') = t'_of_sexp s in
let columns = tabulate columns ~key_fn:Column.name_of in
({name; columns; persistent} : t)

let find name (t : t) =
match Hashtbl.find_opt t.columns name with
| Some c ->
c
| _ ->
raise (Db_exn.DBCache_NotFound ("missing column", t.name, name))

let create ~name ~columns ~persistent : t =
let columns =
let tbl = Hashtbl.create 64 in
List.iter (fun c -> Hashtbl.add tbl c.Column.name c) columns ;
tbl
in
{name; columns; persistent}

let name_of t = t.name
end

type relationship = OneToMany of string * string * string * string
[@@deriving sexp]

module Database = struct
type t = {tables: Table.t list} [@@deriving sexp]
type t' = {tables: Table.t list} [@@deriving sexp]

type t = {tables: (string, Table.t) Hashtbl.t}

let t_of_t' : t' -> t =
fun (t' : t') ->
let ({tables} : t') = t' in
let tables = tabulate tables ~key_fn:Table.name_of in
{tables}

let t'_of_t : t -> t' =
fun (t : t) ->
let ({tables} : t) = t in
let tables = values_of_table tables in
{tables}

let sexp_of_t t =
let t' = t'_of_t t in
sexp_of_t' t'

let t_of_sexp s =
let t' = t'_of_sexp s in
t_of_t' t'

let find name t =
try List.find (fun tbl -> tbl.Table.name = name) t.tables
with Not_found ->
raise (Db_exn.DBCache_NotFound ("missing table", name, ""))
match Hashtbl.find_opt t.tables name with
| Some tbl ->
tbl
| _ ->
raise (Db_exn.DBCache_NotFound ("missing table", name, ""))

let of_tables tables =
let tables = tabulate tables ~key_fn:Table.name_of in
{tables}
end

(** indexed by table name, a list of (this field, foreign table, foreign field) *)
Expand Down Expand Up @@ -161,7 +237,7 @@ let empty =
{
major_vsn= 0
; minor_vsn= 0
; database= {Database.tables= []}
; database= {Database.tables= Hashtbl.create 64}
; one_to_many= ForeignMap.empty
; many_to_many= ForeignMap.empty
}
Expand All @@ -174,7 +250,8 @@ let is_field_persistent schema tblname fldname =
tbl.Table.persistent && col.Column.persistent

let table_names schema =
List.map (fun t -> t.Table.name) (database schema).Database.tables
let tables = (database schema).Database.tables in
Hashtbl.fold (fun k _ ks -> k :: ks) tables []

let one_to_many tblname schema =
(* If there is no entry in the map it means that the table has no one-to-many relationships *)
Expand Down
49 changes: 33 additions & 16 deletions ocaml/database/test_schemas.ml
Original file line number Diff line number Diff line change
Expand Up @@ -99,22 +99,35 @@ let schema =
; issetref= false
}
in
let vm_table =
{
Schema.Table.name= "VM"
; columns=
[_ref; uuid; name_label; vbds; pp; name_description; tags; other_config]
; persistent= true
}
let vm_table : Schema.Table.t =
Schema.Table.t_of_t'
{
Schema.Table.name= "VM"
; columns=
[
_ref
; uuid
; name_label
; vbds
; pp
; name_description
; tags
; other_config
]
; persistent= true
}
in
let vbd_table =
{
Schema.Table.name= "VBD"
; columns= [_ref; uuid; vm; type']
; persistent= true
}
Schema.Table.t_of_t'
{
Schema.Table.name= "VBD"
; columns= [_ref; uuid; vm; type']
; persistent= true
}
in
let database =
Schema.Database.t_of_t' {Schema.Database.tables= [vm_table; vbd_table]}
in
let database = {Schema.Database.tables= [vm_table; vbd_table]} in
let one_to_many =
Schema.ForeignMap.add "VBD" [("VM", "VM", "VBDs")] Schema.ForeignMap.empty
in
Expand All @@ -140,12 +153,16 @@ let many_to_many =
in
let foo_column = {bar_column with Schema.Column.name= "foos"} in
let foo_table =
{Schema.Table.name= "foo"; columns= [bar_column]; persistent= true}
Schema.Table.t_of_t'
{Schema.Table.name= "foo"; columns= [bar_column]; persistent= true}
in
let bar_table =
{Schema.Table.name= "bar"; columns= [foo_column]; persistent= true}
Schema.Table.t_of_t'
{Schema.Table.name= "bar"; columns= [foo_column]; persistent= true}
in
let database =
Schema.Database.t_of_t' {Schema.Database.tables= [foo_table; bar_table]}
in
let database = {Schema.Database.tables= [foo_table; bar_table]} in
let many_to_many =
Schema.ForeignMap.add "foo"
[("bars", "bar", "foos")]
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/datamodel_lifecycle.ml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ let prototyped_of_field = function
| "VTPM", "persistence_backend" ->
Some "22.26.0"
| "SM", "host_pending_features" ->
Some "24.36.0-next"
Some "24.37.0"
| "host", "last_update_hash" ->
Some "24.10.0"
| "host", "pending_guidances_full" ->
Expand Down
21 changes: 12 additions & 9 deletions ocaml/idl/datamodel_schema.ml
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,16 @@ let of_datamodel () =
in

let table obj =
{
Table.name= Escaping.escape_obj obj.Datamodel_types.name
; columns=
_ref
:: List.map (column obj) (flatten_fields obj.Datamodel_types.contents [])
; persistent=
obj.Datamodel_types.persist = Datamodel_types.PersistEverything
}
Table.t_of_t'
{
Table.name= Escaping.escape_obj obj.Datamodel_types.name
; columns=
_ref
:: List.map (column obj)
(flatten_fields obj.Datamodel_types.contents [])
; persistent=
obj.Datamodel_types.persist = Datamodel_types.PersistEverything
}
in
let is_one_to_many x =
match Datamodel_utils.Relations.classify Datamodel.all_api x with
Expand All @@ -119,7 +121,8 @@ let of_datamodel () =
in

let database api =
{Database.tables= List.map table (Dm_api.objects_of_api api)}
let tables = List.map table (Dm_api.objects_of_api api) in
Database.of_tables tables
in
{
major_vsn= Datamodel_common.schema_major_vsn
Expand Down
31 changes: 17 additions & 14 deletions ocaml/idl/ocaml_backend/gen_api.ml
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ let gen_record_type ~with_module highapi tys =
[
sprintf "let rpc_of_%s_t x = Rpc.Dict (unbox_list [ %s ])"
obj_name (map_fields make_of_field)
; sprintf "let %s_t_of_rpc x = on_dict (fun x -> { %s }) x" obj_name
(map_fields make_to_field)
; sprintf "let %s_t_of_rpc x = on_dict (fun x assocer -> { %s }) x"
obj_name (map_fields make_to_field)
; sprintf
"type ref_%s_to_%s_t_map = (ref_%s * %s_t) list [@@deriving \
rpc]"
Expand Down Expand Up @@ -408,10 +408,6 @@ let gen_client_types highapi =
x | _ -> failwith \"Date.t_of_rpc\""
; "end"
]
; [
"let on_dict f = function | Rpc.Dict x -> f x | _ -> failwith \
\"Expected Dictionary\""
]
; ["let opt_map f = function | None -> None | Some x -> Some (f x)"]
; [
"let unbox_list = let rec loop aux = function"
Expand All @@ -421,14 +417,21 @@ let gen_client_types highapi =
; "loop []"
]
; [
"let assocer key map default = "
; " try"
; " List.assoc key map"
; " with Not_found ->"
; " match default with"
; " | Some d -> d"
; " | None -> failwith (Printf.sprintf \"Field %s not present in \
rpc\" key)"
"let assocer kvs ="
; "let tbl = Hashtbl.create 256 in"
; "List.iter (fun (k, v) -> Hashtbl.replace tbl k v) kvs;"
; "fun key _ default ->"
; "match Hashtbl.find_opt tbl key with"
; "| Some v -> v"
; "| _ ->"
; " match default with"
; " | Some d -> d"
; " | _ -> failwith (Printf.sprintf \"Field %s not present in rpc\" \
key)"
]
; [
"let on_dict f = function | Rpc.Dict x -> f x (assocer x) | _ -> \
failwith \"Expected Dictionary\""
]
; gen_non_record_type all_types
; gen_record_type ~with_module:true highapi
Expand Down
62 changes: 49 additions & 13 deletions ocaml/idl/ocaml_backend/gen_db_actions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -298,35 +298,71 @@ let db_action api : O.Module.t =
~body:(List.concat [open_db_module; body])
()
in
let contains_setrefs fields =
let is_referential_field = function
| {DT.ty= DT.Set (DT.Ref _); field_ignore_foreign_key= false; _} ->
true
| _ ->
false
in
List.exists is_referential_field fields
in
let get_record_aux_fn_body ?(m = "API.") (obj : obj) (all_fields : field list)
=
let of_field = function
| {
DT.ty= DT.Set (DT.Ref other)
DT.ty= DT.Set (DT.Ref _ as ty)
; full_name
; DT.field_ignore_foreign_key= false
; _
} ->
Printf.sprintf "List.map %s.%s (List.assoc \"%s\" __set_refs)"
_string_to_dm
(OU.alias_of_ty (DT.Ref other))
let accessor = "find_setref" in
Printf.sprintf "List.map %s.%s (%s \"%s\")" _string_to_dm
(OU.alias_of_ty ty) accessor
(Escaping.escape_id full_name)
| f ->
_string_to_dm
^ "."
^ OU.alias_of_ty f.DT.ty
^ "(List.assoc \""
^ Escaping.escape_id f.full_name
^ "\" __regular_fields)"
let ty_alias = OU.alias_of_ty f.DT.ty in
let accessor = "find_regular" in
let field_name = Escaping.escape_id f.full_name in
Printf.sprintf {|%s.%s (%s "%s")|} _string_to_dm ty_alias accessor
field_name
in
let make_field f =
Printf.sprintf " %s%s = %s;" m
(OU.ocaml_of_record_field (obj.DT.name :: f.DT.full_name))
(of_field f)
in

let create_lookup_fn name initial_size kvs =
let indent = " " in
[
Printf.sprintf "let %s =" name
; " let module HT = Hashtbl in"
; Printf.sprintf " let tbl = HT.create %d in" initial_size
; Printf.sprintf " List.iter (fun (k, v) -> HT.replace tbl k v) %s;" kvs
; " HT.find tbl"
; "in"
]
|> List.map (( ^ ) indent)
in
let populate_regulars_tbl =
create_lookup_fn "find_regular" 256 "__regular_fields"
in
let populate_setrefs_tbl =
if contains_setrefs all_fields then
create_lookup_fn "find_setref" 32 "__set_refs"
else
[]
in
let fields = List.map make_field all_fields in
let mk_rec = ["{"] @ fields @ [" }"] in
String.concat "\n" mk_rec
let mk_rec = [" {"] @ fields @ [" }"] in
let body =
"\n"
^ (populate_regulars_tbl @ populate_setrefs_tbl @ mk_rec
|> String.concat "\n"
)
in
body
in
let get_record_aux_fn (obj : obj) =
let record_fields = List.filter client_side_field (DU.fields_of_obj obj) in
Expand Down Expand Up @@ -364,7 +400,7 @@ let db_action api : O.Module.t =
expr
; Printf.sprintf
"List.map (fun (ref,(__regular_fields,__set_refs)) -> \
Ref.of_%sstring ref, %s __regular_fields __set_refs) records"
Ref.of_%sstring ref, %s ~__regular_fields ~__set_refs) records"
(if obj.DT.name = "session" then "secret_" else "")
conversion_fn
]
Expand Down

0 comments on commit 1da872b

Please sign in to comment.