diff --git a/master_changes.md b/master_changes.md index 2efe7e53f99..b9850f1008d 100644 --- a/master_changes.md +++ b/master_changes.md @@ -62,6 +62,11 @@ users) ## Repository * Accurately tag `curl` download command when loaded from global config file [#6270 @rjbou] + * Remove wget support for Software Heritage fallback [#6036 @rjbou] + * [BUG] Fix SWH archive cooking request for wget [#6036 @rjbou - fix #5721] + * [BUG] Fix SWH liveness check [#6036 @rjbou] + * Update SWH API request [#6036 @rjbou] + * Rework SWH fallback to have a more correct archive retrieval and more fine grained error handling [#6036 @rjbou] ## Lock @@ -81,6 +86,7 @@ users) ## VCS ## Build + * Add `re` as a dependency of `opam-repository` [#6036 @rjbou] ## Infrastructure @@ -157,6 +163,9 @@ users) * `OpamArg.InvalidCLI`: export exception [#6150 @rjbou] ## opam-repository + * `OpamDownload.get_output`: fix `wget` option for `POST` requests [#6036 @rjbou] + * `OpamDownload.get_output`: use long form for `curl` `POST` request option [#6036 @rjbou] + * `OpamDownload.download`: more fine grained HTTP request error code detection for curl [#6036 @rjbou] ## opam-state diff --git a/opam-repository.opam b/opam-repository.opam index 96505f6887c..442c22d41e1 100644 --- a/opam-repository.opam +++ b/opam-repository.opam @@ -31,4 +31,5 @@ depends: [ "ocaml" {>= "4.08.0"} "opam-format" {= version} "dune" {>= "2.8.0"} + "re" {>= "1.10.0"} ] diff --git a/src/repository/dune b/src/repository/dune index cb622b218b4..5a94881525c 100644 --- a/src/repository/dune +++ b/src/repository/dune @@ -3,7 +3,7 @@ (public_name opam-repository) (synopsis "OCaml Package Manager remote repository handling library") ; TODO: Remove (re_export ...) when CI uses the OCaml version that includes https://github.com/ocaml/ocaml/pull/11989 - (libraries (re_export opam-format)) + (libraries (re_export opam-format) re) (flags (:standard (:include ../ocaml-flags-standard.sexp) (:include ../ocaml-flags-configure.sexp) diff --git a/src/repository/opamDownload.ml b/src/repository/opamDownload.ml index 633f43762b4..890bc5971b3 100644 --- a/src/repository/opamDownload.ml +++ b/src/repository/opamDownload.ml @@ -37,6 +37,7 @@ let curl_args = (* --fail is as old as curl; though the assumption that it leads to exit code 22 when there's an error is probably 5.3 21-Dec-1998 (prior to that it led to exit code 21) *) + (CString "--show-headers", None) :: (CString "--fail", None) :: main_args else (CString "--write-out", None) :: @@ -53,6 +54,7 @@ let wget_args = [ CString "--header=Accept: */*", None; CString "-t", None; CIdent "retry", None; CString "-O", None; CIdent "out", None; + CString "--server-response", None; (* Get the HTTP responde to parse error code *) CString "-U", None; user_agent, None; CString "--", None; (* End list of options *) CIdent "url", None; @@ -129,57 +131,111 @@ let download_command_t ~with_curl_mitigation ~compress ?checksum ~url ~dst c = OpamSystem.make_command ~allow_stdin:false ~stdout cmd args @@> c let tool_return redownload_command url ret = + let code_from_header std = + let re = + Re.(compile @@ seq [ + bos; rep space; + str "HTTP/"; rep1 @@ diff any space; space; + group @@ repn digit 3 (Some 3) + ]) in + List.filter_map (fun l -> + try + let r = + Re.(Group.get (exec re l) 1) + in + let code = int_of_string r in + if code < 400 then None else Some code + with Failure _ -> None + | Not_found -> None + ) std + in + let open OpamProcess in match Lazy.force OpamRepositoryConfig.(!r.download_tool) with + | [(CIdent "wget"), _], `Default -> + (match ret.OpamProcess.r_code with + | 0 -> (* Http respose < 400 *) + Done `ok + | 8 -> (* Http response > 400 *) + let http_answers = + code_from_header ret.OpamProcess.r_stderr + in + (match http_answers with + | code::_ -> Done (`http_error code) + | [] -> + Done (`fail (Some "wget empty response", + Printf.sprintf "curl: empty response while downloading %s" + (OpamUrl.to_string url)))) + | _ -> (* Another error *) + (* 1 Generic error code. + 2 Parse error---for instance, when parsing command-line options + 3 File I/O error. + 4 Network failure. + 5 SSL verification failure. + 6 Username/password authentication failure. + 7 Protocol errors. *) + Done (`fail (Some "wget command error", + Printf.sprintf "wget error: %s" + (OpamProcess.result_summary ret)))) | _, `Default -> if OpamProcess.is_failure ret then - fail (Some "Download command failed", - Printf.sprintf "Download command failed: %s" - (OpamProcess.result_summary ret)) - else Done () + Done (`fail (Some "Download command failed", + Printf.sprintf "Download command failed: %s" + (OpamProcess.result_summary ret))) + else Done `ok | _, `Curl -> - if OpamProcess.is_failure ret then - if ret.r_code = 43 then begin - (* Code 43 is CURLE_BAD_FUNCTION_ARGUMENT (7.1 7-Aug-2000). This should - never be encountered using the curl binary, so we assume that it's - a manifestation of curl/curl#13845 (see also #6120). *) - log "Attempting to mitigate curl/curl#13845"; - redownload_command ~with_curl_mitigation:true @@ function ret -> - if OpamProcess.is_failure ret then - if ret.r_code = 22 then - (* If this broken version of curl persists for some time, it is - relatively straightforward to parse the http response code from - the message, as it hasn't changed. *) - fail (Some "curl failed owing to a server-side issue", - Printf.sprintf "curl failed with server-side error: %s" - (OpamProcess.result_summary ret)) - else - fail (Some "curl failed", - Printf.sprintf "curl failed: %s" - (OpamProcess.result_summary ret)) - else Done () - end else - fail (Some "curl failed", Printf.sprintf "curl failed: %s" - (OpamProcess.result_summary ret)) - else - match ret.OpamProcess.r_stdout with - | [] -> - fail (Some "curl empty response", - Printf.sprintf "curl: empty response while downloading %s" - (OpamUrl.to_string url)) - | l -> - let code = List.hd (List.rev l) in - let num = try int_of_string code with Failure _ -> 999 in - if num >= 400 then - fail (Some ("curl error code " ^ code), - Printf.sprintf "curl: code %s while downloading %s" - code (OpamUrl.to_string url)) - else Done () + match ret with + | { r_code = 0 ; r_stdout = []; _ } -> + Done (`fail (Some "curl empty response", + Printf.sprintf "curl: empty response while downloading %s" + (OpamUrl.to_string url))) + | { r_code = 0 ; r_stdout = (_::_ as l); _ } -> + let code = List.hd (List.rev l) in + let num = try int_of_string code with Failure _ -> 999 in + if num >= 400 then + Done (`http_error num) + else Done `ok + | { r_code = 43; _ } -> + (* Code 43 is CURLE_BAD_FUNCTION_ARGUMENT (7.1 7-Aug-2000). This should + never be encountered using the curl binary, so we assume that it's + a manifestation of curl/curl#13845 (see also #6120). *) + log "Attempting to mitigate curl/curl#13845"; + (redownload_command ~with_curl_mitigation:true @@ function ret -> + if OpamProcess.is_failure ret then + if ret.r_code = 22 then + match code_from_header ret.r_stdout with + | code::_ -> Done (`http_error code) + | [] -> + (* If this broken version of curl persists for some time, it is + relatively straightforward to parse the http response code from + the message, as it hasn't changed. *) + Done (`fail (Some "curl failed owing to a server-side issue", + Printf.sprintf "curl failed with server-side error: %s" + (OpamProcess.result_summary ret))) + else + Done (`fail (Some "curl failed", + Printf.sprintf "curl failed: %s" + (OpamProcess.result_summary ret))) + else Done `ok) + | _ -> (* code <> 0 / 43 *) + Done (`fail (Some "curl failed", + Printf.sprintf "curl failed: %s" + (OpamProcess.result_summary ret))) -let download_command ~compress ?checksum ~url ~dst () = +let download_command_http_error ~compress ?checksum ~url ~dst () = let download_command = download_command_t ~compress ?checksum ~url ~dst in download_command ~with_curl_mitigation:false @@ tool_return download_command url +let download_command ~compress ?checksum ~url ~dst () = + download_command_http_error ~compress ?checksum ~url ~dst () + @@| function + | `ok -> () + | `http_error code -> + fail (Some ("HTTP error code " ^ string_of_int code), + Printf.sprintf "code %d while downloading %s" + code (OpamUrl.to_string url)) + | `fail (s,l) -> fail (s,l) + let really_download ?(quiet=false) ~overwrite ?(compress=false) ?checksum ?(validate=true) ~url ~dst () = @@ -261,6 +317,7 @@ let check_post_tool () = let get_output ~post ?(args=[]) url = let cmd_args = + (* should we read from output or redirect in a file ? *) download_args ~url ~out:"-" ~retry:OpamRepositoryConfig.(!r.retries) ~compress:false () @ args @@ -268,7 +325,8 @@ let get_output ~post ?(args=[]) url = let cmd_args = if post then match cmd_args with - | ("wget"|"curl" as cmd)::args -> Some (cmd :: ["-X"; "POST"] @ args) + | ("curl" as cmd)::args -> Some (cmd :: ["--request"; "POST"] @ args) + | ("wget" as cmd)::args -> Some (cmd :: ["--method"; "POST"] @ args) | _ -> None else Some cmd_args in @@ -283,13 +341,25 @@ module SWHID = struct let instance = OpamUrl.of_string "https://archive.softwareheritage.org" (* we keep api 1 hardcoded for the moment *) - let full_url middle hash = OpamUrl.Op.(instance / "api" / "1" / middle / hash / "") + let full_url middle hash = + OpamUrl.Op.(instance / "api" / "1" / middle / hash / "") + let vault_url kind hash = + full_url ("vault/" ^ kind) ("swh:1:dir:" ^ hash) - let check_liveness () = - OpamProcess.Job.catch (fun _ -> Done false) - @@ fun () -> - get_output ~post:true OpamUrl.Op.(instance / "api" / "1" / "ping" / "") - @@| fun _ -> true + let fallback_err fmt = Printf.sprintf ("SWH fallback: "^^fmt) + + let get_output ?(post=false) url = + get_output ~post url @@| function + | Some out -> out + | None -> + (* Shouldn't happen, we already checked that a post tool is used *) + (* XXX change to an assert false ? *) + OpamConsole.error "Software Heritage fallback needs %s or %s installed" + (OpamConsole.colorise `underline "curl") + (OpamConsole.colorise `underline "wget"); + fail (None, + "Software Heritage fallback not available as \ + it needs curl or wget used") let get_value key s = match OpamJson.of_string s with @@ -299,6 +369,89 @@ module SWHID = struct | _ -> None) | _ -> None + let check_liveness () = + OpamProcess.Job.catch (fun _ -> Done false) + @@ fun () -> + get_output ~post:false OpamUrl.Op.(instance / "api" / "1" / "ping" / "") + @@| function + | pong::_ -> + (* curl output after answering the http code *) + (* https://archive.softwareheritage.org/api/1/ping/ *) + OpamStd.String.starts_with ~prefix:"\"pong\"" pong + | _ -> false + + (* + Returned error JSONs + { + "error":"Resource not found", + "reason":"The resource /api/1/vault/flat/swh:1:dir:6b700f4b287aee509adbc723d030309188684f4/ could not be found on the server." + } + { + "exception":"NotFoundExc", + "reason":"Cooking of swh:1:dir:6b700f4b287aee509adbc723d030309188684f04 was never requested." + } + { + "exception":"NotFoundExc", + "reason":"swh:1:dir:0000000000000000000000000000000000000000 not found." + } + *) + let parse_err json = + match get_value "exception" json with + | Some "NotFoundExc" -> + (match get_value "reason" json with + | Some reason -> + if OpamStd.String.ends_with ~suffix:"was never requested." reason then + `Uncooked + else if OpamStd.String.ends_with ~suffix:"not found." reason then + `Not_found + else `Error + | None -> `Error) + | Some "Resource not found" -> `Not_found + | Some _ | None -> `Error + + let is_it_cooked url = + let dst = OpamSystem.temp_file ~auto_clean:false "swh-out" in + let download_cmd ~with_curl_mitigation return = + let cmd, args = + match + download_args ~url ~out:dst + ~with_curl_mitigation + ~retry:OpamRepositoryConfig.(!r.retries) + ~compress:false () + with + | "curl" as cmd::args -> cmd, args + | "wget" as cmd::args -> cmd, "--content-on-error"::args + | _ -> assert false + in + let stdout = OpamSystem.temp_file ~auto_clean:false "dl" in + OpamProcess.Job.finally (fun () -> OpamSystem.remove_file stdout) + @@ fun () -> + OpamSystem.make_command ~allow_stdin:false ~stdout cmd args + @@> return + in + (download_cmd ~with_curl_mitigation:false + @@ tool_return download_cmd url) + @@| fun status -> + let read_last_line file = + try + OpamStd.String.split (OpamSystem.read file) '\n' + |> List.rev + |> List.hd + with Failure _ -> "" + in + let status = + match status with + | `ok -> + let json = read_last_line dst in + if String.equal json "" then `Error else `Cooked json + | `http_error 404 -> + let json = read_last_line dst in + parse_err json + | `http_error _ | `fail _ -> `Error + in + OpamSystem.remove_file dst; (* TODO XXX and in case of error raised ? *) + status + (* SWH request output example directory: retrieve "status" & "fetch_url" $ curl https://archive.softwareheritage.org/api/1/vault/directory/4453cfbdab1a996658cd1a815711664ee7742380/ @@ -313,51 +466,60 @@ module SWHID = struct } *) - let get_output ?(post=false) url = - get_output ~post url @@| function - | Some out -> - Some (String.concat "" out) - | None -> - OpamConsole.error "Software Heritage fallback needs %s or %s installed" - (OpamConsole.colorise `underline "curl") - (OpamConsole.colorise `underline "wget"); - None - - let get_dir hash = - let url = full_url "vault/directory" hash in - get_output ~post:true url @@| OpamStd.Option.replace @@ fun json -> + let read_flat_out json = let status = get_value "status" json in let fetch_url = get_value "fetch_url" json in match status, fetch_url with - | None, _ | _, None -> None + | None, _ | _, None -> + (match parse_err json with + | `Not_found -> `Not_found + | `Error | `Uncooked -> `Malformed) | Some status, Some fetch_url -> - Some (match status with - | "done" -> `Done (OpamUrl.of_string fetch_url) - | "pending" -> `Pending - | "new" -> `New - | "failed" -> `Failed - | _ -> `Unknown) - - let fallback_err fmt = Printf.sprintf ("SWH fallback: "^^fmt) + match status with + | "done" -> `Done (OpamUrl.of_string fetch_url) + | "pending" -> `Pending + | "new" -> `New + | "failed" -> `Failed + | _ -> `Unknown let get_url ?(max_tries=6) swhid = - let attempts = max_tries in + let request_cooking ?(post=false) url = + get_output ~post url @@| fun out -> String.concat "" out + in let hash = OpamSWHID.hash swhid in - let rec aux max_tries = - if max_tries <= 0 then - Done (Not_available - (Some (fallback_err "max_tries"), - fallback_err "%d attempts tried; aborting" attempts)) - else - get_dir hash @@+ function - | Some (`Done fetch_url) -> Done (Result fetch_url) - | Some (`Pending | `New) -> - Unix.sleep 10; - aux (max_tries - 1) - | None | Some (`Failed | `Unknown) -> - Done (Not_available (None, fallback_err "Unknown swhid")) + (* https://archive.softwareheritage.org/api/1/vault/flat/doc/ *) + let url = vault_url "flat" hash in + let rec loop attempt json = + match read_flat_out json with + | `Done fetch_url -> Done (Result fetch_url) + | `Pending | `New -> + log "%s is cooking (%d/%d)..." + (OpamSWHID.to_string swhid) attempt max_tries; + if (attempt : int) >= (max_tries : int) then + Done (Not_available + (Some (fallback_err "attempt"), + fallback_err "%d attempts tried; aborting" max_tries)) + else + (Unix.sleep 10; + request_cooking ~post:false url + @@+ loop (attempt + 1)) + | `Malformed -> + Done (Not_available (None, fallback_err "Malformed request answer")) + | `Failed | `Unknown | `Not_found -> + Done (Not_available (None, fallback_err "Unknown swhid")) in - aux max_tries + let retrieve_url json = loop 1 json in + is_it_cooked url + @@+ function + | `Error -> Done (Not_available (None, fallback_err "Request error")) + | `Not_found -> Done (Not_available (None, fallback_err "Unknown swhid")) + | `Cooked json -> + log "%s is cooked or cooking, requesting url" (OpamSWHID.to_string swhid); + retrieve_url json + | `Uncooked -> + log "%s is uncooked, request cooking" (OpamSWHID.to_string swhid); + request_cooking ~post:true url + @@+ retrieve_url (* for the moment only used in sources, not extra sources or files *) let archive_fallback ?max_tries urlf dirnames = @@ -367,58 +529,64 @@ module SWHID = struct if check_post_tool () then check_liveness () @@+ fun alive -> if alive then - (* Add a global modifier and/or command for default answering *) - if OpamConsole.confirm ~default:false - "Source %s is not available. Do you want to try to retrieve it \ - from Software Heritage cache (https://www.softwareheritage.org)? \ - It may take few minutes." - (OpamConsole.colorise `underline - (OpamUrl.to_string (OpamFile.URL.url urlf))) then - (log "SWH fallback for %s" - (OpamUrl.to_string (OpamFile.URL.url urlf)); - get_url ?max_tries swhid @@+ function - | Not_available _ as error -> Done error - | Up_to_date _ -> assert false - | Result url -> - let hash = OpamSWHID.hash swhid in - OpamFilename.with_tmp_dir_job @@ fun dir -> - let archive = OpamFilename.Op.(dir // hash) in - download_as ~overwrite:true url archive @@+ fun () -> - let sources = OpamFilename.Op.(dir / "src") in - OpamFilename.extract_job archive sources @@| function - | Some e -> - Not_available ( - Some (fallback_err "archive extraction failure"), - fallback_err "archive extraction failure %s" - (match e with - | Failure s -> s - | OpamSystem.Process_error pe -> - OpamProcess.string_of_result pe - | e -> Printexc.to_string e)) - | None -> - (match OpamSWHID.compute sources with - | None -> - Not_available ( - Some (fallback_err "can't check archive validity"), - fallback_err - "error on swhid computation, can't check its validity") - | Some computed -> - if String.equal computed hash then - (List.iter (fun (_nv, dst, _sp) -> - (* add a text *) - OpamFilename.copy_dir ~src:sources ~dst) - dirnames; - Result (Some "SWH fallback")) - else - Not_available ( - Some (fallback_err "archive not valid"), - fallback_err - "archive corrupted, opam file swhid %S vs computed %S" - hash computed))) - else - Done (Not_available - (Some (fallback_err "skip retrieval"), - fallback_err "retrieval refused by user")) + (log "API is working"; + (* Add a global modifier and/or command for default answering *) + if OpamConsole.confirm ~default:false + "Source %s is not available. Do you want to try to retrieve it \ + from Software Heritage cache (https://www.softwareheritage.org)? \ + It may take few minutes." + (OpamConsole.colorise `underline + (OpamUrl.to_string (OpamFile.URL.url urlf))) then + (log "SWH fallback for %s with %s" + (OpamStd.Format.pretty_list + (List.map (fun (nv,_,_) -> nv) dirnames)) + (OpamSWHID.to_string swhid); + get_url ?max_tries swhid @@+ function + | Not_available _ as error -> Done error + | Up_to_date _ -> assert false + | Result url -> + log "Downloading %s for %s" (OpamSWHID.to_string swhid) + (OpamStd.Format.pretty_list + (List.map (fun (nv,_,_) -> nv) dirnames)); + let hash = OpamSWHID.hash swhid in + OpamFilename.with_tmp_dir_job @@ fun dir -> + let archive = OpamFilename.Op.(dir // hash) in + download_as ~overwrite:true url archive @@+ fun () -> + let sources = OpamFilename.Op.(dir / "src") in + OpamFilename.extract_job archive sources @@| function + | Some e -> + Not_available ( + Some (fallback_err "archive extraction failure"), + fallback_err "archive extraction failure %s" + (match e with + | Failure s -> s + | OpamSystem.Process_error pe -> + OpamProcess.string_of_result pe + | e -> Printexc.to_string e)) + | None -> + (match OpamSWHID.compute sources with + | None -> + Not_available ( + Some (fallback_err "can't check archive validity"), + fallback_err + "error on swhid computation, can't check its validity") + | Some computed -> + if String.equal computed hash then + (List.iter (fun (_nv, dst, _sp) -> + (* add a text *) + OpamFilename.copy_dir ~src:sources ~dst) + dirnames; + Result (Some "SWH fallback")) + else + Not_available ( + Some (fallback_err "archive not valid"), + fallback_err + "archive corrupted, opam file swhid %S vs computed %S" + hash computed))) + else + Done (Not_available + (Some (fallback_err "skip retrieval"), + fallback_err "retrieval refused by user"))) else Done (Not_available (Some (fallback_err "unreachable"),