From ca3b66d9cc60bb0460bb3e1f4583299f3c7e496b Mon Sep 17 00:00:00 2001 From: Reilly Brogan Date: Tue, 19 Dec 2023 15:53:39 -0600 Subject: [PATCH] Fix sourceforge URLS Sourceforge does a lot of redirects to the final download destination and the Golang HTTP client doesn't seem to handle this very well out of the box. We can fix this by stripping the Referer header from all HTTP requests following the redirects. I wasn't able to make this work with adding this to the grab httpclient (probably something I needed to do with the request body) but that's easily worked around by just doing a HEAD request in the first place and following redirects until we get to the final download URL which we can pass to grab. Signed-off-by: Reilly Brogan --- builder/source/simple.go | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/builder/source/simple.go b/builder/source/simple.go index e123d41..0789a31 100644 --- a/builder/source/simple.go +++ b/builder/source/simple.go @@ -25,6 +25,7 @@ import ( "net/url" "os" "path/filepath" + "strings" "time" "github.com/cavaliergopher/grab/v3" @@ -125,13 +126,46 @@ func (s *SimpleSource) IsFetched() bool { // download downloads simple files using go grab. func (s *SimpleSource) download(destination string) error { - req, err := grab.NewRequest(destination, s.URI) + // Some web servers (*cough* sourceforge) have strange redirection behavior. It's possible to work around this by clearing the Referer header on every redirect + headHttpClient := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + for k := range req.Header { + if strings.ToLower(k) == "referer" { + delete(req.Header, k) + } + } + return nil + }, + Transport: &http.Transport{ + DisableCompression: true, + Proxy: http.ProxyFromEnvironment, + }, + } + + // Do a HEAD request, following all redirects until we get the final URL. + headResp, err := headHttpClient.Head(s.URI) + if err != nil { + return err + } + defer headResp.Body.Close() + + finalURL := headResp.Request.URL.String() + if s.URI != finalURL { + slog.Info("Source URL redirected", "uri", finalURL) + } + + req, err := grab.NewRequest(destination, finalURL) if err != nil { return err } // Indicate that we will accept any response content-type. Some servers will fail without this (like netfilter.org) - req.HTTPRequest.Header.Add("Accept", `*/*`) + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept#sect1 + req.HTTPRequest.Header.Add("Accept", "*/*") + + // Request content without modification or compression + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#identity + req.HTTPRequest.Header.Add("Accept-Encoding", "identity") // Ensure the checksum matches if !s.legacy { @@ -146,6 +180,8 @@ func (s *SimpleSource) download(destination string) error { // Create a client with compression disabled. // See: https://github.com/cavaliergopher/grab/blob/v3.0.1/v3/client.go#L53 client := &grab.Client{ + // To be fully compliant with the User-Agent spec we need to include the version + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent UserAgent: "solbuild/" + util.SolbuildVersion, HTTPClient: &http.Client{ Transport: &http.Transport{