From 67882eba91bb34d04113764eb7a1bd714f22d74f Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 26 Apr 2024 22:56:01 +0000 Subject: [PATCH 1/6] Rewrite fail function to support format and argument fail is always used in combination with Printf.sprintf so combine the two. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/test/fe_test.ml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index 42991d5f16b..1268cee120f 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -149,17 +149,18 @@ let fail x = Printf.fprintf stderr "%s\n" x ; assert false +let fail fmt = Format.ksprintf fail fmt + let expect expected s = if s <> expected ^ "\n" then - fail (Printf.sprintf "output %s expected %s" s expected) + fail "output %s expected %s" s expected let test_exitcode () = let run_expect cmd expected = try Forkhelpers.execute_command_get_output cmd [] |> ignore with Forkhelpers.Spawn_internal_error (_, _, Unix.WEXITED n) -> if n <> expected then - fail - (Printf.sprintf "%s exited with code %d, expected %d" cmd n expected) + fail "%s exited with code %d, expected %d" cmd n expected in run_expect "/bin/false" 1 ; run_expect "/bin/xe-fe-test-no-command" 127 ; @@ -257,7 +258,7 @@ let slave = function List.iter (fun fd -> if not (List.mem fd (List.map fst filtered)) then - fail (Printf.sprintf "fd %s not in /proc/%d/fd [ %s ]" fd pid ls) + fail "fd %s not in /proc/%d/fd [ %s ]" fd pid ls ) fds ; (* Check that we have the expected number *) @@ -265,10 +266,8 @@ let slave = function Printf.fprintf stderr "%s %d\n" total_fds (List.length present - 1) *) if total_fds <> List.length filtered then - fail - (Printf.sprintf "Expected %d fds; /proc/%d/fd has %d: %s" total_fds - pid (List.length filtered) ls - ) + fail "Expected %d fds; /proc/%d/fd has %d: %s" total_fds pid + (List.length filtered) ls let sleep () = Unix.sleep 3 ; Printf.printf "Ok\n" From 4ec11700911e39dcfb464ce806d3d7c3f4a57c80 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sat, 27 Apr 2024 04:59:24 +0000 Subject: [PATCH 2/6] Use fail instead of failwith if possible More compact code. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/test/fe_test.ml | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index 1268cee120f..d4db7b8a706 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -20,6 +20,13 @@ let min_fds = 7 let max_fds = 1024 - 13 (* fe daemon has a bunch for its own use *) +let fail x = + Xapi_stdext_unix.Unixext.write_string_to_file "/tmp/fe-test.log" x ; + Printf.fprintf stderr "%s\n" x ; + assert false + +let fail fmt = Format.ksprintf fail fmt + let all_combinations fds = let y = { @@ -117,7 +124,7 @@ let test_delay () = let timeout = 1.7 in try Forkhelpers.execute_command_get_output ~timeout exe args |> ignore ; - failwith "Failed to timeout" + fail "Failed to timeout" with | Forkhelpers.Subprocess_timeout -> let elapsed = Unix.gettimeofday () -. start in @@ -127,10 +134,7 @@ let test_delay () = if elapsed > timeout +. 0.2 then failwith "Excessive time elapsed" | e -> - failwith - (Printf.sprintf "Failed with unexpected exception: %s" - (Printexc.to_string e) - ) + fail "Failed with unexpected exception: %s" (Printexc.to_string e) let test_notimeout () = let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) in @@ -138,18 +142,7 @@ let test_notimeout () = try Forkhelpers.execute_command_get_output exe args |> ignore ; () - with e -> - failwith - (Printf.sprintf "Failed with unexpected exception: %s" - (Printexc.to_string e) - ) - -let fail x = - Xapi_stdext_unix.Unixext.write_string_to_file "/tmp/fe-test.log" x ; - Printf.fprintf stderr "%s\n" x ; - assert false - -let fail fmt = Format.ksprintf fail fmt + with e -> fail "Failed with unexpected exception: %s" (Printexc.to_string e) let expect expected s = if s <> expected ^ "\n" then @@ -216,7 +209,7 @@ let master fds = let slave = function | [] -> - failwith "Error, at least one fd expected" + fail "Error, at least one fd expected" | total_fds :: rest -> let total_fds = int_of_string total_fds in let fds = From 8f7f7a074a4bf7563e49e958df39c3250c4490ae Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 24 Apr 2024 20:09:55 +0000 Subject: [PATCH 3/6] Compute exe variable just once "exe" variable is compute in mutiple functions, not very expensive to compute once. Reduce code. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/test/fe_test.ml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index d4db7b8a706..d5586213253 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -77,6 +77,8 @@ let shuffle x = let irrelevant_strings = ["irrelevant"; "not"; "important"] +let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) + let one fds x = (*Printf.fprintf stderr "named_fds = %d\n" x.named_fds; Printf.fprintf stderr "extra = %d\n" x.extra;*) @@ -89,7 +91,6 @@ let one fds x = let number_of_extra = x.extra in let other_names = make_names number_of_extra in - let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) in let table = (fun x -> List.combine x (List.map (fun _ -> fd) x)) (names @ other_names) in @@ -114,7 +115,6 @@ let one fds x = let test_delay () = let start = Unix.gettimeofday () in - let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) in let args = ["sleep"] in (* Need to have fractional part because some internal usage split integer and fractional and do computation. @@ -137,7 +137,6 @@ let test_delay () = fail "Failed with unexpected exception: %s" (Printexc.to_string e) let test_notimeout () = - let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) in let args = ["sleep"] in try Forkhelpers.execute_command_get_output exe args |> ignore ; @@ -162,7 +161,6 @@ let test_exitcode () = Printf.printf "\nCompleted exitcode tests\n" let test_output () = - let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) in let expected_out = "output string" in let expected_err = "error string" in let args = ["echo"; expected_out; expected_err] in @@ -172,7 +170,6 @@ let test_output () = print_endline "Completed output tests" let test_input () = - let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) in let input = "input string" in let args = ["replay"] in let out, _ = From 2eb52515c5bf6254b60f0e6f65d737c4a3fff436 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 26 Apr 2024 09:46:22 +0000 Subject: [PATCH 4/6] Fix file descriptor leak in case safe_close_and_exec fails Make sure we release the "sock" file descriptors in all cases. Add test trying to reproduce the issue passing an invalid file descriptor; not a perfect reproduction but failure in this function can happen for instance if daemon is restarted. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/lib/forkhelpers.ml | 7 +++- ocaml/forkexecd/test/fe_test.ml | 59 +++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/ocaml/forkexecd/lib/forkhelpers.ml b/ocaml/forkexecd/lib/forkhelpers.ml index 2c8041b9535..70c223a4b6d 100644 --- a/ocaml/forkexecd/lib/forkhelpers.ml +++ b/ocaml/forkexecd/lib/forkhelpers.ml @@ -195,9 +195,13 @@ let safe_close_and_exec ?env stdin stdout stderr let fds_to_close = ref [] in let add_fd_to_close_list fd = fds_to_close := fd :: !fds_to_close in - (* let remove_fd_from_close_list fd = fds_to_close := List.filter (fun fd' -> fd' <> fd) !fds_to_close in *) + let remove_fd_from_close_list fd = + fds_to_close := List.filter (fun fd' -> fd' <> fd) !fds_to_close + in let close_fds () = List.iter (fun fd -> Unix.close fd) !fds_to_close in + add_fd_to_close_list sock ; + finally (fun () -> let maybe_add_id_to_fd_map id_to_fd_map (uuid, fd, v) = @@ -285,6 +289,7 @@ let safe_close_and_exec ?env stdin stdout stderr Fecomms.write_raw_rpc sock Fe.Exec ; match Fecomms.read_raw_rpc sock with | Ok (Fe.Execed pid) -> + remove_fd_from_close_list sock ; (sock, pid) | Ok status -> let msg = diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index d5586213253..b430bd87c89 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -75,6 +75,20 @@ let shuffle x = done ; Array.to_list arr +let fd_list () = + let pid = Unix.getpid () in + let path = Printf.sprintf "/proc/%d/fd" pid in + List.filter (* get rid of the fd used to read the directory *) + (fun x -> + try + ignore (Unix.readlink (Filename.concat path x)) ; + true + with _ -> false + ) + (Array.to_list (Sys.readdir path)) + +let fd_count () = List.length (fd_list ()) + let irrelevant_strings = ["irrelevant"; "not"; "important"] let exe = Printf.sprintf "/proc/%d/exe" (Unix.getpid ()) @@ -178,6 +192,38 @@ let test_input () = expect input out ; print_endline "Completed input tests" +(* This test tests a failure inside Forkhelpers.safe_close_and_exec. + Although the exact way of this reproduction is never supposed to + happen in the real world, an internal failure could happen for instance + if forkexecd daemon is restarted for a moment, so make sure we are + able to detect and handle these cases *) +let test_internal_failure_error () = + let initial_fd_count = fd_count () in + let leak_fd_detect () = + let current_fd_count = fd_count () in + if current_fd_count <> initial_fd_count then + fail "File descriptor leak detected initially %d files, now %d" + initial_fd_count current_fd_count + in + (* this weird function will open and close "num" file descriptors + and returns the last (now closed) of them, mainly to get an invalid + file descriptor with some closed one before *) + let rec waste_fds num = + let fd = Unix.openfile "/dev/null" [Unix.O_WRONLY] 0o0 in + let ret = if num = 0 then fd else waste_fds (num - 1) in + Unix.close fd ; ret + in + let fd = waste_fds 20 in + let args = ["sleep"] in + try + Forkhelpers.safe_close_and_exec None (Some fd) None [] exe args |> ignore ; + fail "Expected an exception" + with + | Fd_send_recv.Unix_error _ -> + leak_fd_detect () + | e -> + fail "Failed with unexpected exception: %s" (Printexc.to_string e) + let master fds = Printf.printf "\nPerforming timeout tests\n%!" ; test_delay () ; @@ -187,6 +233,8 @@ let master fds = Printf.printf "\nPerforming input/output tests\n%!" ; test_output () ; test_input () ; + Printf.printf "\nPerforming internal failure test\n%!" ; + test_internal_failure_error () ; let combinations = shuffle (all_combinations fds) in Printf.printf "Starting %d tests\n%!" (List.length combinations) ; let i = ref 0 in @@ -215,16 +263,7 @@ let slave = function (* Check that these fds are present *) let pid = Unix.getpid () in let path = Printf.sprintf "/proc/%d/fd" pid in - let raw = - List.filter (* get rid of the fd used to read the directory *) - (fun x -> - try - ignore (Unix.readlink (Filename.concat path x)) ; - true - with _ -> false - ) - (Array.to_list (Sys.readdir path)) - in + let raw = fd_list () in let pairs = List.map (fun x -> (x, Unix.readlink (Filename.concat path x))) raw in From 3dcd8dfa56f25f80cc43d2441b5dfe50e8cf3aaa Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sat, 27 Apr 2024 05:22:45 +0000 Subject: [PATCH 5/6] Use /proc/self instead of /proc/%d and pid if possible We just want information about the current process. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/test/fe_test.ml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index b430bd87c89..86a969ee05f 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -76,8 +76,7 @@ let shuffle x = Array.to_list arr let fd_list () = - let pid = Unix.getpid () in - let path = Printf.sprintf "/proc/%d/fd" pid in + let path = "/proc/self/fd" in List.filter (* get rid of the fd used to read the directory *) (fun x -> try @@ -261,8 +260,7 @@ let slave = function List.filter (fun x -> not (List.mem x irrelevant_strings)) rest in (* Check that these fds are present *) - let pid = Unix.getpid () in - let path = Printf.sprintf "/proc/%d/fd" pid in + let path = "/proc/self/fd" in let raw = fd_list () in let pairs = List.map (fun x -> (x, Unix.readlink (Filename.concat path x))) raw @@ -287,7 +285,7 @@ let slave = function List.iter (fun fd -> if not (List.mem fd (List.map fst filtered)) then - fail "fd %s not in /proc/%d/fd [ %s ]" fd pid ls + fail "fd %s not in /proc/self/fd [ %s ]" fd ls ) fds ; (* Check that we have the expected number *) @@ -295,7 +293,7 @@ let slave = function Printf.fprintf stderr "%s %d\n" total_fds (List.length present - 1) *) if total_fds <> List.length filtered then - fail "Expected %d fds; /proc/%d/fd has %d: %s" total_fds pid + fail "Expected %d fds; /proc/self/fd has %d: %s" total_fds (List.length filtered) ls let sleep () = Unix.sleep 3 ; Printf.printf "Ok\n" From 5b86ed82487e3e6b0e5519abc892d8c7927c4baa Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 28 Apr 2024 10:55:40 +0000 Subject: [PATCH 6/6] Avoids calling Unix.readlink twice In case of getting the list of files we called Unix.readlink twice for every file descriptor. In case we wanted just the count we computed the list anyway. Factor out code to avoid calling function twice and do not compute the list is not needed. Signed-off-by: Frediano Ziglio --- ocaml/forkexecd/test/fe_test.ml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index 86a969ee05f..b5146b9e302 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -75,18 +75,21 @@ let shuffle x = done ; Array.to_list arr -let fd_list () = +let fds_fold f init = let path = "/proc/self/fd" in - List.filter (* get rid of the fd used to read the directory *) - (fun x -> + (* get rid of the fd used to read the directory *) + Array.fold_right + (fun fd_num acc -> try - ignore (Unix.readlink (Filename.concat path x)) ; - true - with _ -> false + let link = Unix.readlink (Filename.concat path fd_num) in + f fd_num link acc + with _ -> acc ) - (Array.to_list (Sys.readdir path)) + (Sys.readdir path) init -let fd_count () = List.length (fd_list ()) +let fd_list () = fds_fold (fun fd_num link l -> (fd_num, link) :: l) [] + +let fd_count () = fds_fold (fun _ _ n -> n + 1) 0 let irrelevant_strings = ["irrelevant"; "not"; "important"] @@ -260,11 +263,7 @@ let slave = function List.filter (fun x -> not (List.mem x irrelevant_strings)) rest in (* Check that these fds are present *) - let path = "/proc/self/fd" in - let raw = fd_list () in - let pairs = - List.map (fun x -> (x, Unix.readlink (Filename.concat path x))) raw - in + let pairs = fd_list () in (* Filter any of stdin,stdout,stderr which have been mapped to /dev/null *) let filtered = List.filter