From c275e6c9e8b69c1d671d3fcab26f40e41b64e0ee Mon Sep 17 00:00:00 2001 From: Reilly Brogan Date: Tue, 19 Dec 2023 15:53:39 -0600 Subject: [PATCH] Fix sourceforge URLS Sourceforge does a lot of redirects to the final download destination and the Golang HTTP client doesn't seem to handle this very well out of the box. We can fix this by stripping the Referer header from all HTTP requests following the redirects. I wasn't able to make this work with adding this to the grab httpclient (probably something I needed to do with the request body) but that's easily worked around by just doing a HEAD request in the first place and following redirects until we get to the final download URL which we can pass to grab. Signed-off-by: Reilly Brogan --- builder/source/simple.go | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/builder/source/simple.go b/builder/source/simple.go index e123d41..228da1d 100644 --- a/builder/source/simple.go +++ b/builder/source/simple.go @@ -25,6 +25,7 @@ import ( "net/url" "os" "path/filepath" + "strings" "time" "github.com/cavaliergopher/grab/v3" @@ -125,13 +126,46 @@ func (s *SimpleSource) IsFetched() bool { // download downloads simple files using go grab. func (s *SimpleSource) download(destination string) error { - req, err := grab.NewRequest(destination, s.URI) + + // Some web servers (*cough* sourceforge) have strange redirection behavior. It's possible to work around this by clearing the Referer header on every redirect + headHttpClient := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + for k, _ := range req.Header { + if strings.ToLower(k) == "referer" { + delete(req.Header, k) + } + } + return nil + }, + Transport: &http.Transport{ + DisableCompression: true, + Proxy: http.ProxyFromEnvironment, + }, + } + + // Do a HEAD request, following all redirects until we get the final URL. + headResp, err := headHttpClient.Head(s.URI) + if err != nil { + return err + } + + finalURL := headResp.Request.URL.String() + if s.URI != finalURL { + slog.Info("Source URL redirected", "uri", finalURL) + } + + req, err := grab.NewRequest(destination, finalURL) if err != nil { return err } // Indicate that we will accept any response content-type. Some servers will fail without this (like netfilter.org) - req.HTTPRequest.Header.Add("Accept", `*/*`) + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept#sect1 + req.HTTPRequest.Header.Add("Accept", "*/*") + + // Request content without modification or compression + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#identity + req.HTTPRequest.Header.Add("Accept-Encoding", "identity") // Ensure the checksum matches if !s.legacy { @@ -146,6 +180,8 @@ func (s *SimpleSource) download(destination string) error { // Create a client with compression disabled. // See: https://github.com/cavaliergopher/grab/blob/v3.0.1/v3/client.go#L53 client := &grab.Client{ + // To be fully compliant with the User-Agent spec we need to include the version + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent UserAgent: "solbuild/" + util.SolbuildVersion, HTTPClient: &http.Client{ Transport: &http.Transport{