From b9da32eef18ed0c4df3524807f5e63dbe38429eb Mon Sep 17 00:00:00 2001 From: Gavin Date: Thu, 16 May 2024 16:22:11 -0700 Subject: [PATCH] Restrict filename and format strings ... to a reasonable character set. This avoids characters that may cause issues in mac, linux, and windows --- README.md | 10 ++-- RELEASE_NOTES.md | 3 ++ core/values/values.go | 42 ++++++++++++++-- core/values/values_test.go | 29 +++++++++-- service/integration_test.go | 99 +++++++++++++++++++++++++++++-------- 5 files changed, 151 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index bb13968..a0bf3fc 100644 --- a/README.md +++ b/README.md @@ -150,8 +150,10 @@ curl -H "Authorization: OAuth $KBASE_TOKEN" -T mylittlefile "http:///node?filename=mylittlefile&format=text" ``` -`filename` can be at most 256 characters with no control characters. -`format` can be at most 100 characters with no control characters. +`filename` can be at most 256 characters consisting of only unicode alphanumerics, space, and +the characters `[ ] ( ) = . - _`. +`format` can be at most 100 characters consisting of only unicode alphanumerics and +the characters `- _`. ## Copy a node ``` @@ -265,8 +267,8 @@ The form **may** contain a part called `format` where the part contents are the file, equivalent to the `format` query parameter for the standard upload method and with the same restrictions. The `format` part **MUST** come before the `upload` part. -Any file name provided in the `Content-Disposition` header can be at most 256 characters with no -control characters. +Any file name provided in the `Content-Disposition` header has the same restrictions as the +filename parameter for the standard upload method. ### Curl example diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 15ee12a..38306c3 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,9 @@ * Added the `del` param when downloading the file from a node. * The Blobstore will now look for auth tokens in cookies specified in the deployment configuration. +* The filename and format strings are now much more restrictive in regard to allowed contents. + See the API documenation for details. + * Any extant data is not affected and will be returned when requested as normal. # 0.1.3 diff --git a/core/values/values.go b/core/values/values.go index 6735e28..b7f243e 100644 --- a/core/values/values.go +++ b/core/values/values.go @@ -9,11 +9,23 @@ import ( ) const ( - maxFileNameSize = 256 - maxFormatSize = 100 + maxFileNameSize = 256 + maxFormatSize = 100 + allowedFileChars = "[]()=-_. " + allowedFormatChars = "_-" ) var md5regex = regexp.MustCompile("^[a-fA-F0-9]{32}$") +var fileCharsSet = createAllowedCharsSet(allowedFileChars) +var formatCharsSet = createAllowedCharsSet(allowedFormatChars) + +func createAllowedCharsSet(allowedSpecialChars string) map[rune]struct{} { + allowedCharsSet := make(map[rune]struct{}) + for _, char := range allowedSpecialChars { + allowedCharsSet[char] = struct{}{} + } + return allowedCharsSet +} // MD5 contains a valid MD5 string. type MD5 struct { @@ -53,7 +65,7 @@ type FileName struct { // NewFileName creates a new filename. func NewFileName(name string) (*FileName, error) { - name, err := checkString(name, "File name", maxFileNameSize) + name, err := checkString(name, "File name", maxFileNameSize, fileCharsSet) if err != nil { return nil, err } @@ -72,7 +84,7 @@ type FileFormat struct { // NewFileFormat creates a new filename. func NewFileFormat(name string) (*FileFormat, error) { - name, err := checkString(name, "File format", maxFormatSize) + name, err := checkString(name, "File format", maxFormatSize, formatCharsSet) if err != nil { return nil, err } @@ -84,17 +96,37 @@ func (fn *FileFormat) GetFileFormat() string { return fn.fileFormat } -func checkString(s string, name string, maxSize int) (string, error) { +func checkString( + s string, name string, maxSize int, allowedSpcChars map[rune]struct{}, +) (string, error) { s = strings.TrimSpace(s) if len(s) > maxSize { return "", NewIllegalInputError(fmt.Sprintf("%s is > %d bytes", name, maxSize)) } + // check for control characters first to avoid returning / logging a string with control chars if containsControlChar(s) { return "", NewIllegalInputError(fmt.Sprintf("%s contains control characters", name)) } + for _, r := range(s) { + if !isAllowedChar(r, allowedSpcChars) { + return "", NewIllegalInputError( + fmt.Sprintf("%s string %s contains an illegal character: %q", name, s, r), + ) + } + } return s, nil } +func isAllowedChar(r rune, allowedSpcChars map[rune]struct{}) bool { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + return true + } + if _, exists := allowedSpcChars[r]; exists { + return true + } + return false +} + func containsControlChar(s string) bool { for _, c := range s { if unicode.IsControl(c) { diff --git a/core/values/values_test.go b/core/values/values_test.go index 8105a1e..20dc6e4 100644 --- a/core/values/values_test.go +++ b/core/values/values_test.go @@ -51,7 +51,7 @@ func TestFileName(t *testing.T) { } func fileNameString() string { - s := "ab9 8&#']K[1*(&)ꢇ]" // 20 bytes + s := "ab9_8४.[]( )1=-ꢇ" // 20 bytes b := strings.Builder{} for i := 0; i < 12; i++ { @@ -72,18 +72,34 @@ func TestFileNameFail(t *testing.T) { assert.Nil(t, fn, "expected error") assert.Equal(t, NewIllegalInputError("File name contains control characters"), err, "incorrect error") + for _, ch := range []string{"*", ":", "?", "|", "✈", "🐱", "∞", "€", "◊"} { + fn, err = NewFileName("abc"+ch+"neg") + assert.Nil(t, fn, "expected error") + errstr := "File name string abc"+ch+"neg contains an illegal character: '"+ch+"'" + assert.Equal(t, NewIllegalInputError(errstr), err, "incorrect error") + } } func TestFileFormat(t *testing.T) { - fns := fileNameString()[:100] + fns := formatString() assert.Equal(t, 100, len(fns), "incorrect length") fn, err := NewFileFormat(" \t " + fns + " \t ") assert.Nil(t, err, "unexpected error") assert.Equal(t, fns, fn.GetFileFormat()) } +func formatString() string { + s := "ab9_8४K3Z23o1n-ꢇ" // 20 bytes + + b := strings.Builder{} + for i := 0; i < 5; i++ { + b.Write([]byte(s)) + } + return b.String() +} + func TestFileFormatFail(t *testing.T) { - fns := fileNameString()[:100] + fns := formatString() assert.Equal(t, 100, len(fns), "incorrect length") fn, err := NewFileFormat(fns + "a") assert.Nil(t, fn, "expected error") @@ -93,4 +109,11 @@ func TestFileFormatFail(t *testing.T) { assert.Nil(t, fn, "expected error") assert.Equal(t, NewIllegalInputError("File format contains control characters"), err, "incorrect error") + badchrs := []string{"*", ":", "?", "|", "✈", "🐱", "∞", "€", "◊", "=", "[", "]", "(", ")", "."} + for _, ch := range badchrs { + fn, err = NewFileFormat("abc"+ch+"neg") + assert.Nil(t, fn, "expected error") + errstr := "File format string abc"+ch+"neg contains an illegal character: '"+ch+"'" + assert.Equal(t, NewIllegalInputError(errstr), err, "incorrect error") + } } diff --git a/service/integration_test.go b/service/integration_test.go index 3b9cdca..616a890 100644 --- a/service/integration_test.go +++ b/service/integration_test.go @@ -72,9 +72,7 @@ func (t *TestSuite) SetupSuite() { t.mongo, t.mongoclient = t.setupMongo(tcfg) t.minio = t.setupMinio(tcfg) - auth, authurl := t.setupAuth(tcfg) - t.auth = auth - t.authurl = authurl + t.auth, t.authurl = t.setupAuth(tcfg) t.setUpUsersAndRoles() logrus.SetOutput(ioutil.Discard) @@ -91,7 +89,7 @@ func (t *TestSuite) SetupSuite() { S3AccessSecret: "sooporsecret", S3Region: "us-west-1", S3DisableSSL: true, - AuthURL: &authurl, + AuthURL: &t.authurl, AuthAdminRoles: &[]string{adminRole, blobstoreRole}, AuthTokenCookies: &[]string{"cookie1", "cookie2", "cookie3"}, }, @@ -543,8 +541,8 @@ func (t *TestSuite) TestAuthenticationMiddleWareFailBadCookie() { func (t *TestSuite) TestStoreAndGetWithFilename() { // check whitespace - body := t.req("POST", t.url+"/node?filename=%20%20myfile%20%20", - strings.NewReader("foobarbaz"), " OAuth "+t.noRole.token+" ", 380, 200) + body := t.req("POST", t.url+"/node?filename=%20%20my_fileթ%20%20", + strings.NewReader("foobarbaz"), " OAuth "+t.noRole.token+" ", 383, 200) t.checkLogs(logEvent{logrus.InfoLevel, "POST", "/node", 200, &t.noRole.user, "request complete", mtmap(), false}, @@ -565,7 +563,7 @@ func (t *TestSuite) TestStoreAndGetWithFilename() { "format": "", "file": map[string]interface{}{ "checksum": map[string]interface{}{"md5": "6df23dc03f9b54cc38a0fc1483df6e21"}, - "name": "myfile", + "name": "my_fileթ", "size": float64(9), }, }, @@ -583,26 +581,26 @@ func (t *TestSuite) TestStoreAndGetWithFilename() { "format": "", "file": map[string]interface{}{ "checksum": map[string]interface{}{"md5": "6df23dc03f9b54cc38a0fc1483df6e21"}, - "name": "myfile", + "name": "my_fileթ", "size": float64(9), }, }, "error": nil, "status": float64(200), } - t.checkNode(id, &t.noRole, 380, expected2) + t.checkNode(id, &t.noRole, 383, expected2) path1 := "/node/" + id path2 := path1 + "/" - t.checkFile(t.url+path1+"?download", path1, &t.noRole, 9, "myfile", []byte("foobarbaz")) - t.checkFile(t.url+path2+"?download", path2, &t.noRole, 9, "myfile", []byte("foobarbaz")) + t.checkFile(t.url+path1+"?download", path1, &t.noRole, 9, "my_fileթ", []byte("foobarbaz")) + t.checkFile(t.url+path2+"?download", path2, &t.noRole, 9, "my_fileթ", []byte("foobarbaz")) t.checkFile(t.url+path1+"?download_raw", path1, &t.noRole, 9, "", []byte("foobarbaz")) t.checkFile(t.url+path2+"?download_raw", path2, &t.noRole, 9, "", []byte("foobarbaz")) } func (t *TestSuite) TestStoreAndGetNodeAsAdminWithFormatAndTrailingSlashAndSeekAndLength() { - body := t.req("POST", t.url+"/node/?format=JSON", strings.NewReader("foobarbaz"), - "oauth "+t.noRole.token, 378, 200) + body := t.req("POST", t.url+"/node/?format=J-SONթ", strings.NewReader("foobarbaz"), + "oauth "+t.noRole.token, 381, 200) t.checkLogs(logEvent{logrus.InfoLevel, "POST", "/node/", 200, ptr("noroles"), "request complete", mtmap(), false}, ) @@ -617,7 +615,7 @@ func (t *TestSuite) TestStoreAndGetNodeAsAdminWithFormatAndTrailingSlashAndSeekA "created_on": time, "last_modified": time, "id": id, - "format": "JSON", + "format": "J-SONթ", "file": map[string]interface{}{ "checksum": map[string]interface{}{"md5": "6df23dc03f9b54cc38a0fc1483df6e21"}, "name": "", @@ -628,7 +626,7 @@ func (t *TestSuite) TestStoreAndGetNodeAsAdminWithFormatAndTrailingSlashAndSeekA "status": float64(200), } path := "/node/" + id - t.checkNode(id, &t.stdRole, 378, expected) + t.checkNode(id, &t.stdRole, 381, expected) dl := fmt.Sprintf("?download&seek=2&length=5") dlnolen := fmt.Sprintf("?download&seek=6") dlr := fmt.Sprintf("?download_raw&seek=7&length=100") @@ -637,7 +635,7 @@ func (t *TestSuite) TestStoreAndGetNodeAsAdminWithFormatAndTrailingSlashAndSeekA t.checkFile(t.url+path+dl, path, &t.stdRole, 5, id, []byte("obarb")) t.checkFile(t.url+path+dlnolen, path, &t.stdRole, 3, id, []byte("baz")) t.checkFile(t.url+path+dlr, path, &t.stdRole, 2, "", []byte("az")) - t.checkNode(id, &t.kBaseAdmin, 378, expected) + t.checkNode(id, &t.kBaseAdmin, 381, expected) t.checkFile(t.url+path+noopdl, path, &t.kBaseAdmin, 9, id, []byte("foobarbaz")) t.checkFile(t.url+path+dlrlen, path, &t.kBaseAdmin, 7, "", []byte("foobarb")) } @@ -660,9 +658,9 @@ func (t *TestSuite) testGetFileWithDelete(deleter User) { } func (t *TestSuite) TestStoreMIMEMultipartFilenameFormat() { - partsuffix := ` filename="myfile.txt"` - format := "gasbomb" - t.storeMIMEMultipart(partsuffix, &format, "myfile.txt", 392) + partsuffix := ` filename="my-file.txt"` + format := "gas_bomb" + t.storeMIMEMultipart(partsuffix, &format, "my-file.txt", 394) } func (t *TestSuite) TestStoreMIMEMultipartWhitespaceFileNameFormat() { @@ -768,6 +766,53 @@ func (t *TestSuite) storeMIMEMultipart( t.checkFile(t.url+path2+"?download_raw", path2, &t.noRole, 11, "", []byte("foobarbazba")) } + +func (t *TestSuite) TestGetNodeAndFileWithIllegalFileNameAndFormat() { + // In versions of the Blobstore before 0.1.4, filename and format strings were unconstrained + // other than they couldn't contain control characters. + // That data may still exist in older systems, so we need to test that the data can be + // returned, regardless of whether it meets the now stricter criteria for incoming data. + body := t.req("POST", t.url+"/node/", strings.NewReader("f"), + "oauth "+t.noRole.token, 374, 200) + t.checkLogs(logEvent{logrus.InfoLevel, "POST", "/node/", 200, &t.noRole.user, + "request complete", mtmap(), false}, + ) + + data := body["data"].(map[string]interface{}) + id := data["id"].(string) + + for _, ch := range []string{"*", "|", ":", "¥"} { + updatedoc := map[string]interface{}{ + "$set": map[string]string{ + "fname": "abc"+ch+"def", + "fmt": "uvw"+ch+"xyz", + }, + } + t.mongoclient.Database(testDB).Collection("nodes").UpdateOne( + nil, + map[string]string{"id": id}, + updatedoc, + ) + req, err := http.NewRequest(http.MethodGet, t.url+"/node/"+id, nil) + t.Nil(err, "unexpected error") + req.Header.Set("authorization", "oauth "+t.noRole.token) + resp, err := http.DefaultClient.Do(req) + t.Nil(err, "unexpected error") + b, err := ioutil.ReadAll(resp.Body) + t.Nil(err, "unexpected error") + var body map[string]interface{} + json.Unmarshal(b, &body) + data = body["data"].(map[string]interface{}) + t.Equal("uvw"+ch+"xyz", data["format"], "incorrect format") + file := data["file"].(map[string]interface{}) + t.Equal("abc"+ch+"def", file["name"], "incorrect filename") + t.loggerhook.Reset() + t.checkFile( + t.url+"/node/"+id+"?download", "/node/"+id, &t.noRole, 1, "abc"+ch+"def", []byte("f"), + ) + } +} + func (t *TestSuite) TestStoreMIMEMultipartFailContentLength() { // don't load MIME this way, sticks everything in memory for _, cl := range []string{"", "not a number", "-1"} { @@ -858,7 +903,7 @@ func (t *TestSuite) TestFormNodeFailEarlyEOFAfterFormat() { "--supahboundary\n"+ `Content-Disposition: form-data; name="format"`+"\n"+ "\n"+ - "format here\n", + "formathere\n", )) t.Nil(err, "unexpected error") req.Header.Set("Content-Type", "multipart/form-data; boundary=supahboundary") @@ -1112,6 +1157,13 @@ func (t *TestSuite) TestStoreBadFileName() { t.checkLogs(logEvent{logrus.ErrorLevel, "POST", "/node", 400, &t.noRole.user, "File name contains control characters", mtmap(), false}, ) + + body = t.req("POST", t.url+"/node?filename=foo☃bar", strings.NewReader("foobarbaz"), + "oauth "+t.noRole.token, 124, 400) + t.checkError(body, 400, "File name string foo☃bar contains an illegal character: '☃'") + t.checkLogs(logEvent{logrus.ErrorLevel, "POST", "/node", 400, &t.noRole.user, + "File name string foo☃bar contains an illegal character: '☃'", mtmap(), false}, +) } func (t *TestSuite) TestStoreBadFileFormat() { @@ -1121,6 +1173,13 @@ func (t *TestSuite) TestStoreBadFileFormat() { t.checkLogs(logEvent{logrus.ErrorLevel, "POST", "/node", 400, &t.noRole.user, "File format contains control characters", mtmap(), false}, ) + + body = t.req("POST", t.url+"/node?format=foo∑bar", strings.NewReader("foobarbaz"), + "oauth "+t.noRole.token, 126, 400) + t.checkError(body, 400, "File format string foo∑bar contains an illegal character: '∑'") + t.checkLogs(logEvent{logrus.ErrorLevel, "POST", "/node", 400, &t.noRole.user, + "File format string foo∑bar contains an illegal character: '∑'", mtmap(), false}, +) } func (t *TestSuite) TestGetNodeBadID() {