From 0c7927e2d3611290d8ccf06932af9127f546b3d6 Mon Sep 17 00:00:00 2001 From: Dave Dykstra <2129743+DrDaveD@users.noreply.github.com> Date: Wed, 23 Oct 2024 17:44:41 -0500 Subject: [PATCH] process the LINK: header --- cvmfs/network/download.cc | 94 +++++++++++++++++++++++++++++++++++---- cvmfs/network/download.h | 1 + 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/cvmfs/network/download.cc b/cvmfs/network/download.cc index 00a4bf8637..9ef3f139c8 100644 --- a/cvmfs/network/download.cc +++ b/cvmfs/network/download.cc @@ -226,7 +226,12 @@ static size_t CallbackCurlHeader(void *ptr, size_t size, size_t nmemb, // This is metalink info LogCvmfs(kLogDownload, kLogDebug, "(id %" PRId64 ") %s", info->id(), header_line.c_str()); - info->SetLink(header_line.substr(5)); + std:string link = info->link(); + if (link.size() != 0) + link = link + ", " + header_line.substr(5); + else + link = header_line.substr(5); + info->SetLink(link); } else if (HasPrefix(header_line, "X-SQUID-ERROR:", true)) { // Reinterpret host error as proxy error if (info->error_code() == kFailHostHttp) { @@ -1399,6 +1404,70 @@ void DownloadManager::ReleaseCredential(JobInfo *info) { } +/* Sort links based on the "pri=" parameter */ +static bool sortlinks(std::string s1, std::string s2) { + int pri1, pri2; + if ((sscanf(s1.c_str(), "%*s; pri=%d", &pri1) == 1) && + (sscanf(s2.c_str(), "%*s; pri=%d", &pri2) == 1)) + return pri1 < pri2; + return true; +} + +/** + * Parses Link header and uses it to set a new host chain. + * See rfc6249. + */ +void DownloadManager::ProcessLink(JobInfo *info) { + + std::vector links = SplitString(info->link(), ","); + std::sort(links.begin(), links.end(), sortlinks); + + std::vector host_list; + + std::vector::const_iterator il = links.begin(); + for (; il != links.end(); ++il) { + std::string link = *il; + if ((link.find("; rel=duplicate") == std::string::npos) && + (link.find("; rel=\"duplicate\"") == std::string::npos)) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because it does not contain rel=duplicate", + link.c_str()); + continue; + } + // ignore depth= field since there's nothing useful we can do with it + + int leftbracket = link.find('<'); + if (leftbracket == std::string::npos) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because it does not have a left angle bracket", + link.c_str()); + continue; + } + int rightbracket = link.find('>'); + if (rightbracket == std::string::npos) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because it does not have a right angle bracket", + link.c_str()); + continue; + } + if (leftbracket > rightbracket) { + LogCvmfs(kLogDownload, kLogDebug, + "skipping link '%s' because it angle brackets are out of order", + link.c_str()); + continue; + } + std::string host = link.substr(leftbracket+1, rightbracket-leftbracket-1); + LogCvmfs(kLogDownload, kLogDebug, "adding linked host '%s'", host.c_str()); + host_list.push_back(host); + } + + if (host_list.size() > 0) { + SetHostChain(host_list); + opt_metalink_timestamp_link_ = time(NULL); + } +} + + /** * Checks the result of a curl download and implements the failure logic, such * as changing the proxy server. Takes care of cleanup. @@ -1412,6 +1481,21 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { info->proxy().c_str(), curl_error); UpdateStatistics(info->curl_handle()); + bool was_metalink; + std::string typ; + if (info->current_metalink_chain_index() >= 0) { + was_metalink = true; + typ = "metalink"; + if (info->link() != "") { + // process Link header whether or not the redirected URL got an error + ProcessLink(info); + } + } else { + was_metalink = false; + typ = "host"; + } + + // Verification and error classification switch (curl_error) { case CURLE_OK: @@ -1512,18 +1596,12 @@ bool DownloadManager::VerifyAndFinalize(const int curl_error, JobInfo *info) { break; } - bool was_metalink; - std::string typ; std::vector *host_chain; unsigned char num_used_hosts; - if (info->current_metalink_chain_index() >= 0) { - was_metalink = true; - typ = "metalink"; + if (was_metalink) { host_chain = opt_metalink_.chain; num_used_hosts = info->num_used_metalinks(); } else { - was_metalink = false; - typ = "host"; host_chain = opt_host_.chain; num_used_hosts = info->num_used_hosts(); } diff --git a/cvmfs/network/download.h b/cvmfs/network/download.h index f83b2019bb..826851a71d 100644 --- a/cvmfs/network/download.h +++ b/cvmfs/network/download.h @@ -287,6 +287,7 @@ class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding) void Backoff(JobInfo *info); void SetNocache(JobInfo *info); void SetRegularCache(JobInfo *info); + void ProcessLink(JobInfo *info); bool VerifyAndFinalize(const int curl_error, JobInfo *info); void InitHeaders(); void CloneProxyConfig(DownloadManager *clone);