diff --git a/docs/pages/deployment/pex.rst b/docs/pages/deployment/pex.rst index ba2b2314a..4efa271d5 100644 --- a/docs/pages/deployment/pex.rst +++ b/docs/pages/deployment/pex.rst @@ -95,3 +95,32 @@ Writer of policies should take into consideration: - fields that are intended to be used for logging or authorization decisions should have a distinct identifier. - claims ideally map a registered claim name (e.g. `IANA JWT claims `_) - overwriting properties already defined in the token introspection endpoint response is forbidden. These are: ``iss``, ``sub``, ``exp``, ``iat``, ``active``, ``client_id``, ``scope``. + +Extracting substrings with regular expressions +============================================== +If you want introspection to return part of a string, you can use the ``pattern`` regular expression filter in the field definition with a capture group. +Token introspection will return the value of the capture group in the regular expression, instead of the whole field value. +For instance, if you want to extract the level from the string ``"Admin level 4"`` from the following credential: + +.. code-block:: json + + { + "credentialSubject": { + "role": "Admin level 4" + } + } + +You can define the following field in the input descriptor constraint, to have the level returned in the introspection response as ``admin_level``: + +.. code-block:: json + + { + "id": "admin_level", + "path": ["$.credentialSubject.role"], + "filter": { + "type": "string" + "pattern": "Admin level ([0-9])" + } + } + +Only 1 capture group is supported in regular expressions. If multiple capture groups are defined, an error will be returned. \ No newline at end of file diff --git a/vcr/pe/presentation_definition.go b/vcr/pe/presentation_definition.go index b3b27bfe3..df41b6907 100644 --- a/vcr/pe/presentation_definition.go +++ b/vcr/pe/presentation_definition.go @@ -435,12 +435,12 @@ func matchField(field Field, credential map[string]interface{}) (bool, interface } // if filter at path matches return true - match, err := matchFilter(*field.Filter, value) + match, matchedValue, err := matchFilter(*field.Filter, value) if err != nil { return false, nil, err } if match { - return true, value, nil + return true, matchedValue, nil } // if filter at path does not match continue and set optionalInvalid optionalInvalid++ @@ -466,14 +466,17 @@ func getValueAtPath(path string, vcAsInterface interface{}) (interface{}, error) return value, err } -// matchFilter matches the value against the filter. +// matchFilter matches the value against the filter. It returns true if the value matches the filter, along with the matched value. // A filter is a JSON Schema descriptor (https://json-schema.org/draft/2020-12/json-schema-validation.html#name-a-vocabulary-for-structural) // Supported schema types: string, number, boolean, array, enum. // Supported schema properties: const, enum, pattern. These only work for strings. // Supported go value types: string, float64, int, bool and array. // 'null' values are not supported. -// It returns an error on unsupported features or when the regex pattern fails. -func matchFilter(filter Filter, value interface{}) (bool, error) { +// It returns an error when; +// - an unsupported feature is used +// - the regex pattern fails +// - the regex pattern contains more than 1 capture group +func matchFilter(filter Filter, value interface{}) (bool, interface{}, error) { // first we check if it's an enum, so we can recursively call matchFilter for each value if filter.Enum != nil { for _, enum := range filter.Enum { @@ -481,62 +484,79 @@ func matchFilter(filter Filter, value interface{}) (bool, error) { Type: "string", Const: &enum, } - match, _ := matchFilter(f, value) + match, result, _ := matchFilter(f, value) if match { - return true, nil + return true, result, nil } } - return false, nil + return false, nil, nil } switch typedValue := value.(type) { case string: if filter.Type != "string" { - return false, nil + return false, nil, nil } case float64: if filter.Type != "number" { - return false, nil + return false, nil, nil } case int: if filter.Type != "number" { - return false, nil + return false, nil, nil } case bool: if filter.Type != "boolean" { - return false, nil + return false, nil, nil } case []interface{}: for _, v := range typedValue { - match, err := matchFilter(filter, v) + match, _, err := matchFilter(filter, v) if err != nil { - return false, err + return false, nil, err } if match { - return true, nil + return true, value, nil } } default: // object not supported for now - return false, ErrUnsupportedFilter + return false, nil, ErrUnsupportedFilter } if filter.Const != nil { if value != *filter.Const { - return false, nil + return false, nil, nil } } if filter.Pattern != nil && filter.Type == "string" { re, err := regexp2.Compile(*filter.Pattern, regexp2.ECMAScript) if err != nil { - return false, err + return false, nil, err + } + match, err := re.FindStringMatch(value.(string)) + if err != nil { + return false, nil, err + } + if match == nil { + return false, nil, nil + } + // We support returning a single capture group; + // - If there's a capture group, return it + // - If there's no capture group, return the whole match + // - If there's multiple capture groups, return an error + if len(match.Groups()) == 1 { + return true, string(match.Capture.Runes()), nil + } else if len(match.Groups()) == 2 { + return true, string(match.Groups()[1].Runes()), nil + } else { + return false, nil, errors.New("can't return results from multiple regex capture groups") } - return re.MatchString(value.(string)) } // if we get here, no pattern, enum or const is requested just the type. - return true, nil + return true, value, nil } // deduplicate removes duplicate VCs from the slice. diff --git a/vcr/pe/presentation_definition_test.go b/vcr/pe/presentation_definition_test.go index b1570239e..902ad2dad 100644 --- a/vcr/pe/presentation_definition_test.go +++ b/vcr/pe/presentation_definition_test.go @@ -25,6 +25,7 @@ import ( "embed" "encoding/json" "github.com/nuts-foundation/go-did/did" + "github.com/nuts-foundation/nuts-node/core/to" vcrTest "github.com/nuts-foundation/nuts-node/vcr/test" "strings" "testing" @@ -761,9 +762,12 @@ func Test_matchFilter(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { - got, err := matchFilter(testCase.filter, testCase.value) + matches, matchedValue, err := matchFilter(testCase.filter, testCase.value) require.NoError(t, err) - assert.Equal(t, testCase.want, got) + assert.Equal(t, testCase.want, matches) + if testCase.want && matches { + assert.Equal(t, testCase.value, matchedValue) + } }) } }) @@ -775,13 +779,46 @@ func Test_matchFilter(t *testing.T) { filters := []Filter{f1, f2, f3} t.Run("ok", func(t *testing.T) { for _, filter := range filters { - match, err := matchFilter(filter, stringValue) + match, matchedValue, err := matchFilter(filter, stringValue) require.NoError(t, err) assert.True(t, match) + assert.Equal(t, stringValue, matchedValue) } }) + t.Run("pattern", func(t *testing.T) { + t.Run("no match", func(t *testing.T) { + match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("[0-9]+")}, "value") + require.NoError(t, err) + assert.Nil(t, value) + assert.False(t, match) + }) + t.Run("capture group", func(t *testing.T) { + match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("v([a-z]+)e")}, "value") + require.NoError(t, err) + assert.Equal(t, "alu", value) + assert.True(t, match) + }) + t.Run("no capture group", func(t *testing.T) { + match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("value")}, "value") + require.NoError(t, err) + assert.Equal(t, "value", value) + assert.True(t, match) + }) + t.Run("non-capturing group", func(t *testing.T) { + match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("(?:val)ue")}, "value") + require.NoError(t, err) + assert.Equal(t, "value", value) + assert.True(t, match) + }) + t.Run("too many capture groups", func(t *testing.T) { + match, value, err := matchFilter(Filter{Type: "string", Pattern: to.Ptr("(v)(a)lue")}, "value") + require.EqualError(t, err, "can't return results from multiple regex capture groups") + assert.False(t, match) + assert.Nil(t, value) + }) + }) t.Run("enum value not found", func(t *testing.T) { - match, err := matchFilter(f2, "foo") + match, _, err := matchFilter(f2, "foo") require.NoError(t, err) assert.False(t, match) }) @@ -790,17 +827,17 @@ func Test_matchFilter(t *testing.T) { t.Run("error cases", func(t *testing.T) { t.Run("enum with wrong type", func(t *testing.T) { f := Filter{Type: "object"} - match, err := matchFilter(f, struct{}{}) + match, _, err := matchFilter(f, struct{}{}) assert.False(t, match) assert.Equal(t, err, ErrUnsupportedFilter) }) t.Run("incorrect regex", func(t *testing.T) { pattern := "[" f := Filter{Type: "string", Pattern: &pattern} - match, err := matchFilter(f, stringValue) + match, _, err := matchFilter(f, stringValue) assert.False(t, match) assert.Error(t, err, "error parsing regexp: missing closing ]: `[`") - match, err = matchFilter(f, []interface{}{stringValue}) + match, _, err = matchFilter(f, []interface{}{stringValue}) assert.False(t, match) assert.Error(t, err, "error parsing regexp: missing closing ]: `[`") })