Skip to content

Commit

Permalink
use sync.OnceValue for various regular expressions, require go1.21
Browse files Browse the repository at this point in the history
Using regex.MustCompile consumes a significant amount of memory when
importing the package, even if those regular expressions are not used.

This changes compiling the regular expressions to use a sync.OnceValue
so that they're only compiled the first time they're used.

There are various regular expressions remaining that are still compiled
on import, but these are exported, so changing them to a sync.OnceValue
would be a breaking change; we can still decide to do so, but leaving
that for a follow-up.

It's worth noting that sync.OnceValue requires go1.21 or up, so raising
the minimum version accordingly.

Signed-off-by: Sebastiaan van Stijn <[email protected]>
  • Loading branch information
thaJeztah committed Jul 15, 2024
1 parent aa90dc5 commit 6d47b18
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 16 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module github.com/distribution/reference

go 1.20
go 1.21

require github.com/opencontainers/go-digest v1.0.0
4 changes: 2 additions & 2 deletions normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ type normalizedNamed interface {
// qualified reference. If the value may be an identifier
// use ParseAnyReference.
func ParseNormalizedNamed(s string) (Named, error) {
if ok := anchoredIdentifierRegexp.MatchString(s); ok {
if ok := anchoredIdentifierRegexp().MatchString(s); ok {
return nil, fmt.Errorf("invalid repository name (%s), cannot specify 64-byte hexadecimal strings", s)
}
domain, remainder := splitDockerDomain(s)
Expand Down Expand Up @@ -244,7 +244,7 @@ func TagNameOnly(ref Named) Named {
// ParseAnyReference parses a reference string as a possible identifier,
// full digest, or familiar name.
func ParseAnyReference(ref string) (Reference, error) {
if ok := anchoredIdentifierRegexp.MatchString(ref); ok {
if ok := anchoredIdentifierRegexp().MatchString(ref); ok {
return digestReference("sha256:" + ref), nil

Check failure on line 248 in normalize.go

View workflow job for this annotation

GitHub Actions / build (1.22.x, ubuntu-latest)

cannot convert "sha256:" + ref (value of type string) to type digestReference (typecheck)

Check failure on line 248 in normalize.go

View workflow job for this annotation

GitHub Actions / build (1.22.x, macos-latest)

cannot convert "sha256:" + ref (value of type string) to type digestReference (typecheck)
}
if dgst, err := digest.Parse(ref); err == nil {
Expand Down
10 changes: 5 additions & 5 deletions reference.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ func Path(named Named) (name string) {
// If no valid hostname is found, the hostname is empty and the full value
// is returned as name
func splitDomain(name string) (string, string) {
match := anchoredNameRegexp.FindStringSubmatch(name)
match := anchoredNameRegexp().FindStringSubmatch(name)
if len(match) != 3 {
return "", name
}
Expand All @@ -197,7 +197,7 @@ func Parse(s string) (Reference, error) {

var repo repository

nameMatch := anchoredNameRegexp.FindStringSubmatch(matches[1])
nameMatch := anchoredNameRegexp().FindStringSubmatch(matches[1])
if len(nameMatch) == 3 {
repo.domain = nameMatch[1]
repo.path = nameMatch[2]
Expand Down Expand Up @@ -248,7 +248,7 @@ func ParseNamed(s string) (Named, error) {
// WithName returns a named object representing the given string. If the input
// is invalid ErrReferenceInvalidFormat will be returned.
func WithName(name string) (Named, error) {
match := anchoredNameRegexp.FindStringSubmatch(name)
match := anchoredNameRegexp().FindStringSubmatch(name)
if match == nil || len(match) != 3 {
return nil, ErrReferenceInvalidFormat
}
Expand All @@ -266,7 +266,7 @@ func WithName(name string) (Named, error) {
// WithTag combines the name from "name" and the tag from "tag" to form a
// reference incorporating both the name and the tag.
func WithTag(name Named, tag string) (NamedTagged, error) {
if !anchoredTagRegexp.MatchString(tag) {
if !anchoredTagRegexp().MatchString(tag) {
return nil, ErrTagInvalidFormat
}
var repo repository
Expand All @@ -292,7 +292,7 @@ func WithTag(name Named, tag string) (NamedTagged, error) {
// WithDigest combines the name from "name" and the digest from "digest" to form
// a reference incorporating both the name and the digest.
func WithDigest(name Named, digest digest.Digest) (Canonical, error) {
if !anchoredDigestRegexp.MatchString(digest.String()) {
if !anchoredDigestRegexp().MatchString(digest.String()) {
return nil, ErrDigestInvalidFormat
}
var repo repository
Expand Down
17 changes: 13 additions & 4 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package reference
import (
"regexp"
"strings"
"sync"
)

// DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
Expand Down Expand Up @@ -111,11 +112,15 @@ var (

// anchoredTagRegexp matches valid tag names, anchored at the start and
// end of the matched string.
anchoredTagRegexp = regexp.MustCompile(anchored(tag))
anchoredTagRegexp = sync.OnceValue(func() *regexp.Regexp {
return regexp.MustCompile(anchored(tag))
})

// anchoredDigestRegexp matches valid digests, anchored at the start and
// end of the matched string.
anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
anchoredDigestRegexp = sync.OnceValue(func() *regexp.Regexp {
return regexp.MustCompile(anchored(digestPat))
})

// pathComponent restricts path-components to start with an alphanumeric
// character, with following parts able to be separated by a separator
Expand All @@ -131,13 +136,17 @@ var (

// anchoredNameRegexp is used to parse a name value, capturing the
// domain and trailing components.
anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName)))
anchoredNameRegexp = sync.OnceValue(func() *regexp.Regexp {
return regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName)))
})

referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))

// anchoredIdentifierRegexp is used to check or match an
// identifier value, anchored at start and end of string.
anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
anchoredIdentifierRegexp = sync.OnceValue(func() *regexp.Regexp {
return regexp.MustCompile(anchored(identifier))
})
)

// optional wraps the expression in a non-capturing group and makes the
Expand Down
8 changes: 4 additions & 4 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ func TestDomainRegexp(t *testing.T) {

func TestFullNameRegexp(t *testing.T) {
t.Parallel()
if anchoredNameRegexp.NumSubexp() != 2 {
if anchoredNameRegexp().NumSubexp() != 2 {
t.Fatalf("anchored name regexp should have two submatches: %v, %v != 2",
anchoredNameRegexp, anchoredNameRegexp.NumSubexp())
anchoredNameRegexp(), anchoredNameRegexp().NumSubexp())
}

tests := []regexpMatch{
Expand Down Expand Up @@ -469,7 +469,7 @@ func TestFullNameRegexp(t *testing.T) {
tc := tc
t.Run(tc.input, func(t *testing.T) {
t.Parallel()
checkRegexp(t, anchoredNameRegexp, tc)
checkRegexp(t, anchoredNameRegexp(), tc)
})
}
}
Expand Down Expand Up @@ -580,7 +580,7 @@ func TestIdentifierRegexp(t *testing.T) {
tc := tc
t.Run(tc.input, func(t *testing.T) {
t.Parallel()
match := anchoredIdentifierRegexp.MatchString(tc.input)
match := anchoredIdentifierRegexp().MatchString(tc.input)
if match != tc.match {
t.Errorf("Expected match=%t, got %t", tc.match, match)
}
Expand Down

0 comments on commit 6d47b18

Please sign in to comment.