-
Notifications
You must be signed in to change notification settings - Fork 362
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
example hook: dataset metadata validation (#7752)
- Loading branch information
Showing
9 changed files
with
317 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
--[[ | ||
Validate the existence of mandatory metadata describing a dataset. | ||
A metadata file should exist either in the same directory as the modified dataset, or in any parent directory. | ||
The closest metadata file would take precedence (i.e. same folder > parent > 2nd parent). | ||
# Example hook definition (_lakefs_actions/validate_dataset_fields.yaml): | ||
name: Validate Dataset Fields | ||
description: Validate the existence of mandatory metadata describing a dataset. | ||
on: | ||
pre-merge: | ||
branches: | ||
- main | ||
hooks: | ||
- id: validate_datasets | ||
type: lua | ||
properties: | ||
script_path: scripts/dataset_validator.lua | ||
args: | ||
prefix: 'datasets/' | ||
metadata_file_name: dataset_metadata.yaml | ||
fields: | ||
- name: contains_pii | ||
required: true | ||
type: boolean | ||
- name: approval_link | ||
required: true | ||
type: string | ||
match_pattern: 'https?:\/\/.*' | ||
- name: rank | ||
required: true | ||
type: number | ||
- name: department | ||
type: string | ||
choices: ['hr', 'it', 'other'] | ||
]] | ||
|
||
path = require("path") | ||
regexp = require("regexp") | ||
yaml = require("encoding/yaml") | ||
|
||
lakefs = require("lakefs") | ||
hook = require("hook") | ||
|
||
function is_a_valid_choice(choices, value) | ||
for _, c in ipairs(choices) do | ||
if c == value then | ||
return true | ||
end | ||
end | ||
return false | ||
end | ||
|
||
function check_field(field_descriptor, value, filename) | ||
-- check required but missing | ||
if value == nil and field_descriptor.required then | ||
hook.fail(filename .. ": field '" .. field_descriptor.name .. "' is required but no value given") | ||
end | ||
-- check type is correct | ||
if field_descriptor.type ~= nil and type(value) ~= field_descriptor.type then | ||
hook.fail(filename .. ": field '" .. field_descriptor.name .. "' should be of type " .. field_descriptor.type) | ||
end | ||
-- check choices | ||
if field_descriptor.choices ~= nil and not is_a_valid_choice(field_descriptor.choices, value) then | ||
hook.fail(filename .. ": field '" .. field_descriptor.name .. "' should be one of '" .. table.concat(field_descriptor.choices, ", ") .. "'") | ||
end | ||
-- check pattern | ||
if field_descriptor.match_pattern ~= nil then | ||
if value ~= nil and type(value) ~= "string" then | ||
hook.fail(filename .. ": field " .. field_descriptor.name .. " should be text (got '" .. type(value) .. "') and match pattern '" .. field_descriptor.match_pattern .. "'") | ||
elseif value ~= nil and not regexp.match(field_descriptor.match_pattern, value) then | ||
hook.fail(filename .. ": field " .. field_descriptor.name .. " should match pattern '" .. field_descriptor.match_pattern .. "'") | ||
end | ||
end | ||
end | ||
|
||
|
||
-- main flow | ||
after = "" | ||
has_more = true | ||
metadata_files = {} | ||
while has_more do | ||
local code, resp = lakefs.diff_refs(action.repository_id, action.branch_id, action.source_ref, after, args.prefix) | ||
if code ~= 200 then | ||
error("could not diff: " .. resp.message) | ||
end | ||
for _, result in pairs(resp.results) do | ||
print("" .. result.type .. " " .. result.path) | ||
if result.type == "added" then | ||
should_check = true | ||
valid = true | ||
has_parent = true | ||
current = result.path | ||
descriptor_for_file = "" | ||
|
||
-- find nearest metadata file | ||
while has_parent do | ||
parsed = path.parse(current) | ||
if not parsed.parent or parsed.parent == "" then | ||
has_parent = false | ||
break | ||
end | ||
current_descriptor = path.join("/", parsed.parent, args.metadata_file_name) | ||
-- check if this descriptor has already been cached | ||
if metadata_files[current_descriptor] then | ||
-- cache hit | ||
descriptor_for_file = metadata_files[current_descriptor] | ||
break | ||
|
||
elseif metadata_files[current_descriptor] == nil then | ||
-- cache miss | ||
-- attempt to fetch it | ||
code, body = lakefs.get_object(action.repository_id, action.source_ref, current_descriptor) | ||
if code == 200 then | ||
metadata_files[current_descriptor] = yaml.unmarshal(body) | ||
descriptor_for_file = current_descriptor | ||
break | ||
elseif code ~= 404 then | ||
error("failed to look up metadata file: '" .. current_descriptor .. "', HTTP " .. tostring(code)) | ||
else | ||
-- indicates this doesn't exist, no need to look it up again | ||
metadata_files[current_descriptor] = false | ||
end | ||
end | ||
|
||
current = parsed.parent | ||
end | ||
|
||
-- check if we found a descriptor | ||
if descriptor_for_file == "" then | ||
hook.fail("No dataset metadata found for file: " .. result.path) | ||
end | ||
end | ||
end | ||
-- pagination | ||
has_more = resp.pagination.has_more | ||
after = resp.pagination.next_offset | ||
end | ||
|
||
-- now let's review all the metadata files for this commit: | ||
for metadata_filename, metadata_file in pairs(metadata_files) do | ||
for _, field_descriptor in ipairs(args.fields) do | ||
check_field(field_descriptor, metadata_file[field_descriptor.name], metadata_filename) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package hook | ||
|
||
import ( | ||
"strings" | ||
|
||
"github.com/Shopify/go-lua" | ||
) | ||
|
||
// helpers for writing lua actions | ||
|
||
// ErrHookFailure indicates an explicit failure from a hook | ||
// (as opposed to a generic error that occurred during execution) | ||
type ErrHookFailure string | ||
|
||
func (e ErrHookFailure) Error() string { | ||
return string(e) | ||
} | ||
|
||
func Open(l *lua.State) { | ||
open := func(l *lua.State) int { | ||
lua.NewLibrary(l, library) | ||
return 1 | ||
} | ||
lua.Require(l, "hook", open, false) | ||
l.Pop(1) | ||
} | ||
|
||
var library = []lua.RegistryFunction{ | ||
{Name: "fail", Function: fail}, | ||
} | ||
|
||
func fail(l *lua.State) int { | ||
p := lua.CheckString(l, 1) | ||
lua.Errorf(l, "<HookFailure>%s</HookFailure>", p) | ||
panic("unreachable") | ||
} | ||
|
||
func Unwrap(err error) error { | ||
switch err.(type) { | ||
case lua.RuntimeError, *lua.RuntimeError: | ||
str := err.Error() | ||
_, after, found := strings.Cut(str, "<HookFailure>") | ||
if found { | ||
before, _, _ := strings.Cut(after, "</HookFailure>") | ||
return ErrHookFailure(before) | ||
} | ||
} | ||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package hook_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/treeverse/lakefs/pkg/actions/lua/hook" | ||
|
||
"github.com/Shopify/go-lua" | ||
) | ||
|
||
const scriptWithExplicitFailure = ` | ||
hook = require("hook") | ||
hook.fail("this hook shall not pass") | ||
` | ||
|
||
const scriptWithExplicitError = ` | ||
error("oh no") | ||
` | ||
|
||
const scriptWithSyntaxError = ` | ||
local a = 15 | ||
a += "a" | ||
` | ||
|
||
func TestUnwrap(t *testing.T) { | ||
t.Run("explicit fail", func(t *testing.T) { | ||
l := lua.NewState() | ||
lua.OpenLibraries(l) | ||
hook.Open(l) | ||
err := lua.DoString(l, scriptWithExplicitFailure) | ||
if err == nil { | ||
t.Error("expected error but got none!") | ||
} | ||
before := err | ||
after := hook.Unwrap(before) | ||
if after.Error() != "this hook shall not pass" { | ||
t.Errorf("could not unwrap lua hook error, got %s", after.Error()) | ||
} | ||
}) | ||
t.Run("regular error", func(t *testing.T) { | ||
l := lua.NewState() | ||
lua.OpenLibraries(l) | ||
hook.Open(l) | ||
err := lua.DoString(l, scriptWithExplicitError) | ||
if err == nil { | ||
t.Error("expected error but got none!") | ||
} | ||
before := err | ||
after := hook.Unwrap(err) | ||
if after.Error() != before.Error() { | ||
t.Error("unwrapping things not returned by hook.fail should not change the error") | ||
} | ||
}) | ||
t.Run("syntax error", func(t *testing.T) { | ||
l := lua.NewState() | ||
lua.OpenLibraries(l) | ||
hook.Open(l) | ||
err := lua.DoString(l, scriptWithSyntaxError) | ||
if err == nil { | ||
t.Error("expected error but got none!") | ||
} | ||
before := err | ||
after := hook.Unwrap(err) | ||
if after.Error() != before.Error() { | ||
t.Error("unwrapping things not returned by hook.fail should not change the error") | ||
} | ||
}) | ||
t.Run("nil error", func(t *testing.T) { | ||
after := hook.Unwrap(nil) | ||
if after != nil { | ||
t.Error("unwrapping nil should return nil") | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.