Skip to content

Commit

Permalink
Fix sourceforge URLS
Browse files Browse the repository at this point in the history
Sourceforge does a lot of redirects to the final download destination and the Golang HTTP client doesn't seem to handle this very well out of the box. We can fix this by stripping the Referer header from all HTTP requests following the redirects.

I wasn't able to make this work with adding this to the grab httpclient (probably something I needed to do with the request body) but that's easily worked around by just doing a HEAD request in the first place and following redirects until we get to the final download URL which we can pass to grab.

Signed-off-by: Reilly Brogan <[email protected]>
  • Loading branch information
ReillyBrogan committed Dec 19, 2023
1 parent dca6dc0 commit ca3b66d
Showing 1 changed file with 38 additions and 2 deletions.
40 changes: 38 additions & 2 deletions builder/source/simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"net/url"
"os"
"path/filepath"
"strings"
"time"

"github.com/cavaliergopher/grab/v3"
Expand Down Expand Up @@ -125,13 +126,46 @@ func (s *SimpleSource) IsFetched() bool {

// download downloads simple files using go grab.
func (s *SimpleSource) download(destination string) error {
req, err := grab.NewRequest(destination, s.URI)
// Some web servers (*cough* sourceforge) have strange redirection behavior. It's possible to work around this by clearing the Referer header on every redirect
headHttpClient := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
for k := range req.Header {
if strings.ToLower(k) == "referer" {
delete(req.Header, k)
}
}
return nil
},
Transport: &http.Transport{
DisableCompression: true,
Proxy: http.ProxyFromEnvironment,
},
}

// Do a HEAD request, following all redirects until we get the final URL.
headResp, err := headHttpClient.Head(s.URI)
if err != nil {
return err
}
defer headResp.Body.Close()

finalURL := headResp.Request.URL.String()
if s.URI != finalURL {
slog.Info("Source URL redirected", "uri", finalURL)
}

req, err := grab.NewRequest(destination, finalURL)
if err != nil {
return err
}

// Indicate that we will accept any response content-type. Some servers will fail without this (like netfilter.org)
req.HTTPRequest.Header.Add("Accept", `*/*`)
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept#sect1
req.HTTPRequest.Header.Add("Accept", "*/*")

// Request content without modification or compression
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#identity
req.HTTPRequest.Header.Add("Accept-Encoding", "identity")

// Ensure the checksum matches
if !s.legacy {
Expand All @@ -146,6 +180,8 @@ func (s *SimpleSource) download(destination string) error {
// Create a client with compression disabled.
// See: https://github.com/cavaliergopher/grab/blob/v3.0.1/v3/client.go#L53
client := &grab.Client{
// To be fully compliant with the User-Agent spec we need to include the version
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
UserAgent: "solbuild/" + util.SolbuildVersion,
HTTPClient: &http.Client{
Transport: &http.Transport{
Expand Down

0 comments on commit ca3b66d

Please sign in to comment.