Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gnoweb): simplify url parsing system #3366

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion gno.land/pkg/gnoweb/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ func TestRoutes(t *testing.T) {

for _, r := range routes {
t.Run(fmt.Sprintf("test route %s", r.route), func(t *testing.T) {
t.Logf("input: %q", r.route)
request := httptest.NewRequest(http.MethodGet, r.route, nil)
response := httptest.NewRecorder()
router.ServeHTTP(response, request)
Expand Down Expand Up @@ -125,7 +126,7 @@ func TestAnalytics(t *testing.T) {
request := httptest.NewRequest(http.MethodGet, route, nil)
response := httptest.NewRecorder()
router.ServeHTTP(response, request)
fmt.Println("HELLO:", response.Body.String())
gfanton marked this conversation as resolved.
Show resolved Hide resolved

assert.Contains(t, response.Body.String(), "sa.gno.services")
})
}
Expand All @@ -143,6 +144,7 @@ func TestAnalytics(t *testing.T) {
request := httptest.NewRequest(http.MethodGet, route, nil)
response := httptest.NewRecorder()
router.ServeHTTP(response, request)

assert.NotContains(t, response.Body.String(), "sa.gno.services")
})
}
Expand Down
14 changes: 4 additions & 10 deletions gno.land/pkg/gnoweb/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ func (h *WebHandler) renderPackage(w io.Writer, gnourl *GnoURL) (status int, err
switch {
case gnourl.WebQuery.Has("source"):
return h.renderRealmSource(w, gnourl)
case kind == KindPure,
strings.HasSuffix(gnourl.Path, "/"),
isFile(gnourl.Path):
case kind == KindPure, gnourl.IsFile(), gnourl.IsDir():
i := strings.LastIndexByte(gnourl.Path, '/')
if i < 0 {
return http.StatusInternalServerError, fmt.Errorf("unable to get ending slash for %q", gnourl.Path)
Expand All @@ -152,10 +150,13 @@ func (h *WebHandler) renderPackage(w io.Writer, gnourl *GnoURL) (status int, err
gnourl.WebQuery.Set("source", "") // set source

file := gnourl.Path[i+1:]
// If there nothing after the last slash that mean its a
// directory ...
if file == "" {
return h.renderRealmDirectory(w, gnourl)
}

// ... else, remaining part is a file
gnourl.WebQuery.Set("file", file)
gnourl.Path = gnourl.Path[:i]

Expand Down Expand Up @@ -372,10 +373,3 @@ func generateBreadcrumbPaths(path string) []components.BreadcrumbPart {

return parts
}

// IsFile checks if the last element of the path is a file (has an extension)
func isFile(path string) bool {
base := filepath.Base(path)
ext := filepath.Ext(base)
return ext != ""
}
172 changes: 101 additions & 71 deletions gno.land/pkg/gnoweb/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@ import (
"errors"
"fmt"
"net/url"
"path/filepath"
"regexp"
"strings"
)

type PathKind byte

const (
KindInvalid PathKind = 0
KindUnknown PathKind = 0
thehowl marked this conversation as resolved.
Show resolved Hide resolved
KindRealm PathKind = 'r'
KindPure PathKind = 'p'
)

// rePkgOrRealmPath matches and validates a realm or package path.
var rePkgOrRealmPath = regexp.MustCompile(`^/[a-z][a-zA-Z0-9_/.]*$`)

// GnoURL decomposes the parts of an URL to query a realm.
type GnoURL struct {
// Example full path:
Expand All @@ -28,121 +32,147 @@ type GnoURL struct {
Query url.Values // c=d
}

func (url GnoURL) EncodeArgs() string {
var urlstr strings.Builder
if url.Args != "" {
urlstr.WriteString(url.Args)
}
// EncodeFlag is used to compose and encode URL components.
type EncodeFlag int

if len(url.Query) > 0 {
urlstr.WriteString("?" + url.Query.Encode())
}
const (
EncodePath EncodeFlag = 1 << iota
EncodeArgs
EncodeWebQuery
EncodeQuery
EncodeNoEscape // Disable escaping on arg
)

return urlstr.String()
// Has checks if the EncodeFlag contains all the specified flags.
func (f EncodeFlag) Has(flags EncodeFlag) bool {
return f&flags != 0
}

func (url GnoURL) EncodePath() string {
// Encode encodes the URL components based on the provided flags.
// Encode assums the URL is valid.
gfanton marked this conversation as resolved.
Show resolved Hide resolved
func (gnoURL GnoURL) Encode(encodeFlags EncodeFlag) string {
thehowl marked this conversation as resolved.
Show resolved Hide resolved
gfanton marked this conversation as resolved.
Show resolved Hide resolved
var urlstr strings.Builder
urlstr.WriteString(url.Path)
if url.Args != "" {
urlstr.WriteString(":" + url.Args)
}

if len(url.Query) > 0 {
urlstr.WriteString("?" + url.Query.Encode())
if encodeFlags.Has(EncodePath) {
urlstr.WriteString(gnoURL.Path)
}

return urlstr.String()
}
if encodeFlags.Has(EncodeArgs) && gnoURL.Args != "" {
if encodeFlags.Has(EncodePath) {
urlstr.WriteRune(':')
}

func (url GnoURL) EncodeWebPath() string {
var urlstr strings.Builder
urlstr.WriteString(url.Path)
if url.Args != "" {
pathEscape := escapeDollarSign(url.Args)
urlstr.WriteString(":" + pathEscape)
// XXX: Arguments should ideally always be escaped,
// but this may require changes in some realms.
args := gnoURL.Args
if !encodeFlags.Has(EncodeNoEscape) {
args = escapeDollarSign(url.PathEscape(args))
gfanton marked this conversation as resolved.
Show resolved Hide resolved
}

urlstr.WriteString(args)
}

if len(url.WebQuery) > 0 {
urlstr.WriteString("$" + url.WebQuery.Encode())
if encodeFlags.Has(EncodeWebQuery) && len(gnoURL.WebQuery) > 0 {
urlstr.WriteRune('$')
urlstr.WriteString(gnoURL.WebQuery.Encode())
}

if len(url.Query) > 0 {
urlstr.WriteString("?" + url.Query.Encode())
if encodeFlags.Has(EncodeQuery) && len(gnoURL.Query) > 0 {
urlstr.WriteRune('?')
urlstr.WriteString(gnoURL.Query.Encode())
}

return urlstr.String()
}

func (url GnoURL) Kind() PathKind {
if len(url.Path) < 2 {
return KindInvalid
}
pk := PathKind(url.Path[1])
switch pk {
case KindPure, KindRealm:
return pk
// EncodeArgs encodes the arguments and query parameters into a string.
// This function is intended to be passed as a realm `Render` argument.
func (gnoURL GnoURL) EncodeArgs() string {
return gnoURL.Encode(EncodeArgs | EncodeQuery | EncodeNoEscape)
}

// EncodeURL encodes the path, arguments, and query parameters into a string.
// This function provides the full representation of the URL without the web query.
func (gnoURL GnoURL) EncodeURL() string {
return gnoURL.Encode(EncodePath | EncodeArgs | EncodeQuery)
}

// EncodeWebURL encodes the path, package arguments, web query, and query into a string.
// This function provides the full representation of the URL.
func (gnoURL GnoURL) EncodeWebURL() string {
return gnoURL.Encode(EncodePath | EncodeArgs | EncodeWebQuery | EncodeQuery)
}

// Kind determines the kind of path (invalid, realm, or pure) based on the path structure.
func (gnoURL GnoURL) Kind() PathKind {
if len(gnoURL.Path) > 2 && gnoURL.Path[0] == '/' && gnoURL.Path[2] == '/' {
switch k := PathKind(gnoURL.Path[1]); k {
case KindPure, KindRealm:
return k
}
}
return KindInvalid
return KindUnknown
}

var (
ErrURLMalformedPath = errors.New("malformed URL path")
ErrURLInvalidPathKind = errors.New("invalid path kind")
)
func (gnoURL GnoURL) IsValid() bool {
return rePkgOrRealmPath.MatchString(gnoURL.Path)
}

// reRealName match a realm path
// - matches[1]: path
// - matches[2]: path args
var reRealmPath = regexp.MustCompile(`^` +
`(/(?:[a-zA-Z0-9_-]+)/` + // path kind
`[a-zA-Z][a-zA-Z0-9_-]*` + // First path segment
`(?:/[a-zA-Z][.a-zA-Z0-9_-]*)*/?)` + // Additional path segments
`([:$](?:.*))?$`, // Remaining portions args, separate by `$` or `:`
)
// IsDir checks if the URL path represents a directory.
func (gnoURL GnoURL) IsDir() bool {
return len(gnoURL.Path) > 0 && gnoURL.Path[len(gnoURL.Path)-1] == '/'
}

// IsFile checks if the URL path represents a file.
func (gnoURL GnoURL) IsFile() bool {
return filepath.Ext(gnoURL.Path) != ""
}

var ErrURLInvalidPath = errors.New("invalid or malformed path")

// ParseGnoURL parses a URL into a GnoURL structure, extracting and validating its components.
func ParseGnoURL(u *url.URL) (*GnoURL, error) {
matches := reRealmPath.FindStringSubmatch(u.EscapedPath())
if len(matches) != 3 {
return nil, fmt.Errorf("%w: %s", ErrURLMalformedPath, u.Path)
var webargs string
path, args, found := strings.Cut(u.EscapedPath(), ":")
if found {
args, webargs, _ = strings.Cut(args, "$")
} else {
path, webargs, _ = strings.Cut(path, "$")
}

path := matches[1]
args := matches[2]
// NOTE: `PathUnescape` should already unescape dollar signs.
upath, err := url.PathUnescape(path)
if err != nil {
return nil, fmt.Errorf("unable to unescape path %q: %w", path, err)
}

if len(args) > 0 {
switch args[0] {
case ':':
args = args[1:]
case '$':
default:
return nil, fmt.Errorf("%w: %s", ErrURLMalformedPath, u.Path)
}
if !rePkgOrRealmPath.MatchString(upath) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we used gnolang.IsPurePackagePath / gnolang.IsRealmPath instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To check the string path directly? Or do you mean removing the check and using IsPurePackagePath and IsRealmPath? I would like to have IsPurePackagePath and IsRealmPath; I think it's a better idea than Kind(), but I'm not sure why you pointed out this specific line.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just had beef with this regex and this seemed a good place where to make a comment :D

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But yeah, I think we can also substitute this regex to determine if it's a valid path?

Copy link
Member Author

@gfanton gfanton Dec 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should forbid a URL that is neither pure nor a realm. I prefer this package to perform minimal checks; it should be the caller's choice to accept the URL based on the parsed information. That said, we can still establish a minimal base to work on without assuming any urls.

What do you think about:

  • Removing the concept of "kind" within the structure while still providing helpers to check if the method is pure or a realm, as you mentioned.
  • Adding a new File field, trimming any file from the path when parsing and adding it to the structure.
  • Refining the regex to define what a path can be, based on what we have in gnovm/pkg/gnolang/helpers.go, with the only base constraint that it must start with a slash and at least one character. This will makes the check and the assumption of the path structure simple and easy to manage.
var rePkgOrRealmPath = regexp.MustCompile(`^/[a-z][a-z0-9_./]*$`)

Only pure and realm exist, but gnoweb will likely extend this with /u, for example. So it makes sense to me to make GnoURL a bit more flexible in what it can parse. This way, the caller can decide whether to use it as a direct call to the node or to redirect the user to something else.

Copy link
Member Author

@gfanton gfanton Dec 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@thehowl suggestion preview: gfanton@d0eb4b0

I've directly pushed the commit 8f93987 with suggested changes. If you think it's appropriate, feel free to merge it. Otherwise, please don't hesitate to give me feedback so I can make the necessary updates.

edit: i've made another commit (62b005a) to handle special file, such as LICSENSE

return nil, fmt.Errorf("%w: %q", ErrURLInvalidPath, upath)
}

var err error
webquery := url.Values{}
args, webargs, found := strings.Cut(args, "$")
if found {
if webquery, err = url.ParseQuery(webargs); err != nil {
return nil, fmt.Errorf("unable to parse webquery %q: %w ", webquery, err)
if len(webargs) > 0 {
var parseErr error
if webquery, parseErr = url.ParseQuery(webargs); parseErr != nil {
return nil, fmt.Errorf("unable to parse webquery %q: %w", webargs, parseErr)
}
}

uargs, err := url.PathUnescape(args)
if err != nil {
return nil, fmt.Errorf("unable to unescape path %q: %w", args, err)
return nil, fmt.Errorf("unable to unescape args %q: %w", args, err)
}

return &GnoURL{
Path: path,
Path: upath,
Args: uargs,
WebQuery: webquery,
Query: u.Query(),
Domain: u.Hostname(),
}, nil
}

// escapeDollarSign replaces dollar signs with their URL-encoded equivalent.
func escapeDollarSign(s string) string {
return strings.ReplaceAll(s, "$", "%24")
}
Loading
Loading