Skip to content

Commit

Permalink
PEX: Return capture group for matched patterns (#3526)
Browse files Browse the repository at this point in the history
  • Loading branch information
reinkrul authored Oct 31, 2024
1 parent 12e4cb4 commit 8d74a2a
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 27 deletions.
29 changes: 29 additions & 0 deletions docs/pages/deployment/pex.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,32 @@ Writer of policies should take into consideration:
- fields that are intended to be used for logging or authorization decisions should have a distinct identifier.
- claims ideally map a registered claim name (e.g. `IANA JWT claims <https://www.iana.org/assignments/jwt/jwt.xhtml#claims>`_)
- overwriting properties already defined in the token introspection endpoint response is forbidden. These are: ``iss``, ``sub``, ``exp``, ``iat``, ``active``, ``client_id``, ``scope``.

Extracting substrings with regular expressions
==============================================
If you want introspection to return part of a string, you can use the ``pattern`` regular expression filter in the field definition with a capture group.
Token introspection will return the value of the capture group in the regular expression, instead of the whole field value.
For instance, if you want to extract the level from the string ``"Admin level 4"`` from the following credential:

.. code-block:: json
{
"credentialSubject": {
"role": "Admin level 4"
}
}
You can define the following field in the input descriptor constraint, to have the level returned in the introspection response as ``admin_level``:

.. code-block:: json
{
"id": "admin_level",
"path": ["$.credentialSubject.role"],
"filter": {
"type": "string"
"pattern": "Admin level ([0-9])"
}
}
Only 1 capture group is supported in regular expressions. If multiple capture groups are defined, an error will be returned.
60 changes: 40 additions & 20 deletions vcr/pe/presentation_definition.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,12 +435,12 @@ func matchField(field Field, credential map[string]interface{}) (bool, interface
}

// if filter at path matches return true
match, err := matchFilter(*field.Filter, value)
match, matchedValue, err := matchFilter(*field.Filter, value)
if err != nil {
return false, nil, err
}
if match {
return true, value, nil
return true, matchedValue, nil
}
// if filter at path does not match continue and set optionalInvalid
optionalInvalid++
Expand All @@ -466,77 +466,97 @@ func getValueAtPath(path string, vcAsInterface interface{}) (interface{}, error)
return value, err
}

// matchFilter matches the value against the filter.
// matchFilter matches the value against the filter. It returns true if the value matches the filter, along with the matched value.
// A filter is a JSON Schema descriptor (https://json-schema.org/draft/2020-12/json-schema-validation.html#name-a-vocabulary-for-structural)
// Supported schema types: string, number, boolean, array, enum.
// Supported schema properties: const, enum, pattern. These only work for strings.
// Supported go value types: string, float64, int, bool and array.
// 'null' values are not supported.
// It returns an error on unsupported features or when the regex pattern fails.
func matchFilter(filter Filter, value interface{}) (bool, error) {
// It returns an error when;
// - an unsupported feature is used
// - the regex pattern fails
// - the regex pattern contains more than 1 capture group
func matchFilter(filter Filter, value interface{}) (bool, interface{}, error) {
// first we check if it's an enum, so we can recursively call matchFilter for each value
if filter.Enum != nil {
for _, enum := range filter.Enum {
f := Filter{
Type: "string",
Const: &enum,
}
match, _ := matchFilter(f, value)
match, result, _ := matchFilter(f, value)
if match {
return true, nil
return true, result, nil
}
}
return false, nil
return false, nil, nil
}

switch typedValue := value.(type) {
case string:
if filter.Type != "string" {
return false, nil
return false, nil, nil
}
case float64:
if filter.Type != "number" {
return false, nil
return false, nil, nil
}
case int:
if filter.Type != "number" {
return false, nil
return false, nil, nil
}
case bool:
if filter.Type != "boolean" {
return false, nil
return false, nil, nil
}
case []interface{}:
for _, v := range typedValue {
match, err := matchFilter(filter, v)
match, _, err := matchFilter(filter, v)
if err != nil {
return false, err
return false, nil, err
}
if match {
return true, nil
return true, value, nil
}
}
default:
// object not supported for now
return false, ErrUnsupportedFilter
return false, nil, ErrUnsupportedFilter
}

if filter.Const != nil {
if value != *filter.Const {
return false, nil
return false, nil, nil
}
}

if filter.Pattern != nil && filter.Type == "string" {
re, err := regexp2.Compile(*filter.Pattern, regexp2.ECMAScript)
if err != nil {
return false, err
return false, nil, err
}
match, err := re.FindStringMatch(value.(string))
if err != nil {
return false, nil, err
}
if match == nil {
return false, nil, nil
}
// We support returning a single capture group;
// - If there's a capture group, return it
// - If there's no capture group, return the whole match
// - If there's multiple capture groups, return an error
if len(match.Groups()) == 1 {
return true, string(match.Capture.Runes()), nil
} else if len(match.Groups()) == 2 {
return true, string(match.Groups()[1].Runes()), nil
} else {
return false, nil, errors.New("can't return results from multiple regex capture groups")
}
return re.MatchString(value.(string))
}

// if we get here, no pattern, enum or const is requested just the type.
return true, nil
return true, value, nil
}

// deduplicate removes duplicate VCs from the slice.
Expand Down
51 changes: 44 additions & 7 deletions vcr/pe/presentation_definition_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"embed"
"encoding/json"
"github.com/nuts-foundation/go-did/did"
"github.com/nuts-foundation/nuts-node/core/to"
vcrTest "github.com/nuts-foundation/nuts-node/vcr/test"
"strings"
"testing"
Expand Down Expand Up @@ -761,9 +762,12 @@ func Test_matchFilter(t *testing.T) {

for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
got, err := matchFilter(testCase.filter, testCase.value)
matches, matchedValue, err := matchFilter(testCase.filter, testCase.value)
require.NoError(t, err)
assert.Equal(t, testCase.want, got)
assert.Equal(t, testCase.want, matches)
if testCase.want && matches {
assert.Equal(t, testCase.value, matchedValue)
}
})
}
})
Expand All @@ -775,13 +779,46 @@ func Test_matchFilter(t *testing.T) {
filters := []Filter{f1, f2, f3}
t.Run("ok", func(t *testing.T) {
for _, filter := range filters {
match, err := matchFilter(filter, stringValue)
match, matchedValue, err := matchFilter(filter, stringValue)
require.NoError(t, err)
assert.True(t, match)
assert.Equal(t, stringValue, matchedValue)
}
})
t.Run("pattern", func(t *testing.T) {
t.Run("no match", func(t *testing.T) {
match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("[0-9]+")}, "value")
require.NoError(t, err)
assert.Nil(t, value)
assert.False(t, match)
})
t.Run("capture group", func(t *testing.T) {
match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("v([a-z]+)e")}, "value")
require.NoError(t, err)
assert.Equal(t, "alu", value)
assert.True(t, match)
})
t.Run("no capture group", func(t *testing.T) {
match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("value")}, "value")
require.NoError(t, err)
assert.Equal(t, "value", value)
assert.True(t, match)
})
t.Run("non-capturing group", func(t *testing.T) {
match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("(?:val)ue")}, "value")
require.NoError(t, err)
assert.Equal(t, "value", value)
assert.True(t, match)
})
t.Run("too many capture groups", func(t *testing.T) {
match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("(v)(a)lue")}, "value")
require.EqualError(t, err, "can't return results from multiple regex capture groups")
assert.False(t, match)
assert.Nil(t, value)
})
})
t.Run("enum value not found", func(t *testing.T) {
match, err := matchFilter(f2, "foo")
match, _, err := matchFilter(f2, "foo")
require.NoError(t, err)
assert.False(t, match)
})
Expand All @@ -790,17 +827,17 @@ func Test_matchFilter(t *testing.T) {
t.Run("error cases", func(t *testing.T) {
t.Run("enum with wrong type", func(t *testing.T) {
f := Filter{Type: "object"}
match, err := matchFilter(f, struct{}{})
match, _, err := matchFilter(f, struct{}{})
assert.False(t, match)
assert.Equal(t, err, ErrUnsupportedFilter)
})
t.Run("incorrect regex", func(t *testing.T) {
pattern := "["
f := Filter{Type: "string", Pattern: &pattern}
match, err := matchFilter(f, stringValue)
match, _, err := matchFilter(f, stringValue)
assert.False(t, match)
assert.Error(t, err, "error parsing regexp: missing closing ]: `[`")
match, err = matchFilter(f, []interface{}{stringValue})
match, _, err = matchFilter(f, []interface{}{stringValue})
assert.False(t, match)
assert.Error(t, err, "error parsing regexp: missing closing ]: `[`")
})
Expand Down

0 comments on commit 8d74a2a

Please sign in to comment.