From bb10b0e074035609cd10e0b8d48401a666f4740b Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Thu, 31 Aug 2023 12:42:16 +0200 Subject: [PATCH 1/3] Move GTO docs here from mlem.ai --- content/docs/gto/command-reference/assign.md | 61 ++++ .../docs/gto/command-reference/check-ref.md | 36 +++ .../docs/gto/command-reference/deprecate.md | 141 ++++++++++ .../docs/gto/command-reference/describe.md | 52 ++++ content/docs/gto/command-reference/doctor.md | 36 +++ content/docs/gto/command-reference/history.md | 50 ++++ content/docs/gto/command-reference/index.md | 11 + .../docs/gto/command-reference/register.md | 63 +++++ content/docs/gto/command-reference/remove.md | 26 ++ content/docs/gto/command-reference/show.md | 157 +++++++++++ content/docs/gto/command-reference/stages.md | 24 ++ content/docs/gto/get-started.md | 168 +++++++++++ content/docs/gto/index.md | 41 +++ content/docs/gto/install.md | 31 ++ content/docs/gto/user-guide/index.md | 264 ++++++++++++++++++ content/docs/gto/why-gto.md | 76 +++++ content/docs/sidebar.json | 69 +++++ content/docs/studio/troubleshooting.md | 4 +- .../user-guide/model-registry/add-a-model.md | 2 +- .../user-guide/model-registry/assign-stage.md | 6 +- .../model-registry/register-version.md | 8 +- .../remove-a-model-or-its-details.md | 2 +- .../user-guide/model-registry/use-models.md | 4 +- .../model-registry/view-and-compare-models.md | 16 +- .../what-is-a-model-registry.md | 5 +- .../what-is-a-project.md | 4 +- content/docs/use-cases/model-registry.md | 3 - .../components/LayoutFooter/index.tsx | 7 - .../LayoutHeader/Nav/Popup/styles.module.css | 4 - .../gatsby-theme-iterative/data/menu.ts | 9 - .../data/styles.module.css | 4 - 31 files changed, 1324 insertions(+), 60 deletions(-) create mode 100644 content/docs/gto/command-reference/assign.md create mode 100644 content/docs/gto/command-reference/check-ref.md create mode 100644 content/docs/gto/command-reference/deprecate.md create mode 100644 content/docs/gto/command-reference/describe.md create mode 100644 content/docs/gto/command-reference/doctor.md create mode 100644 content/docs/gto/command-reference/history.md create mode 100644 content/docs/gto/command-reference/index.md create mode 100644 content/docs/gto/command-reference/register.md create mode 100644 content/docs/gto/command-reference/remove.md create mode 100644 content/docs/gto/command-reference/show.md create mode 100644 content/docs/gto/command-reference/stages.md create mode 100644 content/docs/gto/get-started.md create mode 100644 content/docs/gto/index.md create mode 100644 content/docs/gto/install.md create mode 100644 content/docs/gto/user-guide/index.md create mode 100644 content/docs/gto/why-gto.md diff --git a/content/docs/gto/command-reference/assign.md b/content/docs/gto/command-reference/assign.md new file mode 100644 index 0000000000..784ff50b9f --- /dev/null +++ b/content/docs/gto/command-reference/assign.md @@ -0,0 +1,61 @@ +# assign + +Assign stage to specific artifact version. + +## Synopsis + +```usage +usage: gto assign [-r ] [--version ] + [--stage ] [-m ] + [--simple ] [--force] [--push] [--sr] + [-h] + name [ref] + +arguments: + name Artifact name + [ref] Git reference to use +``` + +## Description + +To assign an actionable stage for a specific artifact version use the same +`gto assign` command. Stages can mark the artifact readiness for a specific +consumer. You can plug in a real downsteam system via CI/CD or web hooks, e.g. +to redeploy an ML model. + +```cli +$ gto assign awesome-model --version v0.0.1 --stage prod +Created git tag 'awesome-model#prod#1' that assigns stage to 'v0.0.1' +``` + +GTO creates a special Git tag in +[the standard format](/doc/gto/user-guide#git-tags-format). + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `--version ` - If you provide REF, this will be used to name new version +- `--stage ` - Stage to assign +- `-m `, `--message ` - Message to annotate the Git tag with +- `--simple ` - Use simple notation, e.g. `rf#prod` instead of `rf#prod-5` + [supported values: auto, true, false] [default: auto] +- `--force` - Create the Git tag even if it already exists and is in effect +- `--push` - Push created tag automatically (experimental) +- `--sr`, `--skip-registration` - Don't register a version at specified commit +- `-h`, `--help` - Show this message and exit. + +## Examples + +Assign artifact "nn" to "prod" at specific Git ref instead of supplying artifact +version (note that this will also register a version if it doesn't exist): + +```cli +$ gto assign nn abcd123 --stage prod +``` + +Assign stage at specific Git ref and name the version explicitly (this assumes +that version was not registered yet): + +```cli +$ gto assign nn abcd123 --version v1.0.0 --stage prod +``` diff --git a/content/docs/gto/command-reference/check-ref.md b/content/docs/gto/command-reference/check-ref.md new file mode 100644 index 0000000000..f6c721b932 --- /dev/null +++ b/content/docs/gto/command-reference/check-ref.md @@ -0,0 +1,36 @@ +# check-ref + +Find out the artifact version registered/assigned with ref. + +## Synopsis + +```usage +usage: gto check-ref [-r ] [--json] [--name] [--version] + [--event] [--stage] [-h] + ref + +arguments: + ref Git reference to analyze +``` + +## Description + +You can use `gto check-ref` to interpret a Git tag: + +```cli +$ gto check-ref -r build/example-gto churn#prod#3 +βœ… Stage "prod" was assigned to version "v3.0.0" of artifact "churn" +``` + +For machine-consumable format, use `--json` flag or output specific pieces of +information with `--name`, `--version`, `--stage` or `--event`. + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `--json` - Print output in json format +- `--name` - Show artifact name +- `--version` - Output artifact version +- `--event` - Show event +- `--stage` - Show artifact stage +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/deprecate.md b/content/docs/gto/command-reference/deprecate.md new file mode 100644 index 0000000000..4729efe3fe --- /dev/null +++ b/content/docs/gto/command-reference/deprecate.md @@ -0,0 +1,141 @@ +# deprecate + +Deprecate artifact, deregister a version, or unassign a stage. + +## Synopsis + +```usage +usage: gto deprecate [-r ] [-m ] [--simple ] + [--force] [-d] [--push] [-h] + name [version] [stage] + +arguments: + name Artifact name + [version] Artifact version + [stage] Stage to unassign +``` + +## Description + +The command supports three use cases: + +```cli +# deprecate an artifact: +$ gto deprecate nn + +# deprecate a version: +$ gto deprecate nn v0.0.1 + +# unassign a stage: +$ gto deprecate nn v0.0.1 prod +``` + +### Unassigning a stage + +Sometimes you need to mark an artifact version no longer ready for a specific +consumer, and maybe signal a downstream system about this. You can use +`gto deprecate` for that: + +```cli +$ gto deprecate awesome-model v0.0.1 prod +Created git tag 'awesome-model#prod!#2' that unassigns a stage from 'v0.0.1' +``` + +
+ +### Unassigning a stage: some details and options + +GTO creates a special Git tag in +[the standard format](/doc/gto/user-guide#git-tags-format). + +Note, that later you can create this stage again, if you need to, by calling +`$ gto assign` again. + +You also may want to delete the git tag instead of creating a new one. This is +useful if you don't want to keep extra tags in you Git repo, don't need history +and don't want to trigger a CI/CD or another downstream system. For that, you +can use: + +```cli +$ gto deprecate awesome-model v0.0.1 prod --delete +Deleted git tag 'awesome-model#prod#1' that assigned a stage to 'v0.0.1' +To push the changes upstream, run: +git push origin awesome-model#prod#1 --delete +``` + +
+ +### Deregister a version + +Sometimes you need mark a specific artifact version as a no longer ready for +usage. You could just delete a git tag, but if you want to preserve a history of +the actions, you may again use `gto deprecate`. + +```cli +$ gto deprecate awesome-model v0.0.1 +Created git tag 'awesome-model@v0.0.1!' that deregistered a version. +``` + +
+ +### Deregister a version: some details and options + +If you want to deregister the version by deleting the Git tags itself, you could +use + +```cli +$ gto deprecate awesome-model v0.0.1 --delete +Deleted git tag 'awesome-model@v0.0.1' that registered a version. +Deleted git tag 'awesome-model#prod#1' that assigned a stage to 'v0.0.1'. +Deleted git tag 'awesome-model#prod!#2' that unassigned a stage to 'v0.0.1'. +To push the changes upstream, run: +git push origin awesome-model@v0.0.1 awesome-model#prod#1 awesome-model#prod!#2 --delete +``` + +This includes all Git tags related to the version: a tag that registered it and +all tags that assigned stages to it. + +
+ +### Deprecating an artifact + +Sometimes you need to need to mark the artifact as "deprecated", usually meaning +it's outdated and will no longer be developed. To do this, you could run: + +```cli +$ gto deprecate awesome-model +Created Git tag 'awesome-model@deprecated' that deprecates an artifact. +``` + +
+ +### Deprecating an artifact: some details and options + +With `awesome-model@deprecated` Git tag the artifact will be considered +deprecated until you register a new version or assign a new stage to it after +the deprecation. + +If you want to deprecate an artifact by deleting git tags, you'll need to delete +all of them for the artifact. You could do that with + +```cli +$ gto deprecate awesome-model --delete +Deleted git tag 'awesome-model@v0.0.1' that registered a version. +Deleted git tag 'awesome-model#prod#1' that assigned a stage to 'v0.0.1'. +Deleted git tag 'awesome-model#prod!#2' that unassigned a stage to 'v0.0.1'. +To push the changes upstream, run: +git push origin awesome-model@v0.0.1 awesome-model#prod#1 awesome-model#prod!#2 --delete +``` + +
+ +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `-m `, `--message ` - Message to annotate the Git tag with +- `--simple ` - Use simple notation, e.g. `rf#prod` instead of `rf#prod-5` + [supported values: auto, true, false] [default: auto] +- `--force` - Create the Git tag even if it already exists and is in effect +- `-d`, `--delete` - Delete the git tag(s) instead of creating the new one +- `--push` - Push created tag automatically (experimental) +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/describe.md b/content/docs/gto/command-reference/describe.md new file mode 100644 index 0000000000..6fd02baa8b --- /dev/null +++ b/content/docs/gto/command-reference/describe.md @@ -0,0 +1,52 @@ +# describe + +Display enrichments for an artifact. + +## Synopsis + +```usage +usage: gto describe [-r ] [--rev ] [--type] [--path] + [--description] [-h] + name + +arguments: + name Artifact name +``` + +## Description + +To get details about an artifact (from `artifacts.yaml`) use `gto describe`: + +```cli +$ gto describe churn -r https://github.com/iterative/example-gto +{ + "type": "model", + "path": "models/churn.pkl", + "virtual": false +} +``` + +The output is in JSON format for ease of parsing programmatically. + +Note, that for local repos the `artifacts.yaml` is read from the workspace +without Git, so if you have uncommitted changes, they will be reflected in the +output. If you want to read from specific commit, you need to specify `--rev` +option. + +You can also get annotation for specific versions (these are the same shortcuts +as in `gto show`): + +```cli +$ gto describe churn@latest # highest version by SemVer +$ gto describe churn#dev # version in stage `dev` +$ gto describe churn@v3.0.0 # version `v3.0.0` +``` + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `--rev ` - Repo revision to use +- `--type` - Show type +- `--path` - Show path +- `--description` - Show description +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/doctor.md b/content/docs/gto/command-reference/doctor.md new file mode 100644 index 0000000000..4afc9636e2 --- /dev/null +++ b/content/docs/gto/command-reference/doctor.md @@ -0,0 +1,36 @@ +# describe + +Display GTO version and check the registry for problems. + +## Synopsis + +```usage +usage: gto doctor [-r ] [-A] [-h] +``` + +## Description + +This will check the registry and print all the issues if found: + +```cli +$ gto doctor +πŸͺ΄ GTO Version: 0.2.5 +--------------------------------- +INDEX='artifacts.yaml' +TYPES=None +STAGES=None +LOG_LEVEL='INFO' +DEBUG=False +ENRICHMENTS=[] +AUTOLOAD_ENRICHMENTS=True +CONFIG_FILE_NAME='.gto' +EMOJIS=True +--------------------------------- +βœ… No issues found +``` + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `-A`, `--all-commits` - Read all commits +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/history.md b/content/docs/gto/command-reference/history.md new file mode 100644 index 0000000000..b05b87021a --- /dev/null +++ b/content/docs/gto/command-reference/history.md @@ -0,0 +1,50 @@ +# history + +Show a journal of registry operations. + +## Synopsis + +```usage +usage: gto history [-r ] [-a] [-A] [--json] [--plain] + [--asc] [-h] + [name] + +arguments: + [name] Artifact name to show. If empty, show all. +``` + +## Description + +This command prints a journal of the events that happened to an artifact. This +allows you to audit the changes. + +```cli +$ gto history churn -r https://github.com/iterative/example-gto +╒═════════════════════╀════════════╀══════════════╀═══════════╀═════════╀══════════╀═════════════════╕ +β”‚ timestamp β”‚ artifact β”‚ event β”‚ version β”‚ stage β”‚ commit β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•ͺ════════════β•ͺ══════════════β•ͺ═══════════β•ͺ═════════β•ͺ══════════β•ͺ═════════════════║ +β”‚ 2022-11-09 13:40:33 β”‚ churn β”‚ assignment β”‚ v3.1.1 β”‚ dev β”‚ 2f2a8de β”‚ churn#dev#5 β”‚ +β”‚ 2022-11-09 13:40:33 β”‚ churn β”‚ registration β”‚ v3.1.1 β”‚ - β”‚ 2f2a8de β”‚ churn@v3.1.1 β”‚ +β”‚ 2022-11-08 09:53:53 β”‚ churn β”‚ commit β”‚ v3.1.1 β”‚ - β”‚ 2f2a8de β”‚ 2f2a8de β”‚ +β”‚ 2022-11-07 06:07:13 β”‚ churn β”‚ assignment β”‚ v3.1.0 β”‚ dev β”‚ 064f173 β”‚ churn#dev#4 β”‚ +β”‚ 2022-11-06 02:20:33 β”‚ churn β”‚ assignment β”‚ v3.0.0 β”‚ prod β”‚ ddae695 β”‚ churn#prod#3 β”‚ +β”‚ 2022-11-04 22:33:53 β”‚ churn β”‚ assignment β”‚ v3.1.0 β”‚ staging β”‚ 064f173 β”‚ churn#staging#2 β”‚ +β”‚ 2022-11-03 18:47:13 β”‚ churn β”‚ assignment β”‚ v3.0.0 β”‚ dev β”‚ ddae695 β”‚ churn#dev#1 β”‚ +β”‚ 2022-11-02 15:00:33 β”‚ churn β”‚ registration β”‚ v3.1.0 β”‚ - β”‚ 064f173 β”‚ churn@v3.1.0 β”‚ +β”‚ 2022-11-01 11:13:53 β”‚ churn β”‚ commit β”‚ v3.1.0 β”‚ - β”‚ 064f173 β”‚ 064f173 β”‚ +β”‚ 2022-10-28 23:53:53 β”‚ churn β”‚ registration β”‚ v3.0.0 β”‚ - β”‚ ddae695 β”‚ churn@v3.0.0 β”‚ +β”‚ 2022-10-27 20:07:13 β”‚ churn β”‚ commit β”‚ v3.0.0 β”‚ - β”‚ ddae695 β”‚ ddae695 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +Use `--all-branches` and `--all-commits` to read more than just HEAD. + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `-a`, `--all-branches` - Read heads from all branches +- `-A`, `--all-commits` - Read all commits +- `--json` - Print output in json format +- `--plain` - Print table in grep-able format +- `--ascending`, `--asc` - Show new first +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/index.md b/content/docs/gto/command-reference/index.md new file mode 100644 index 0000000000..201c589aba --- /dev/null +++ b/content/docs/gto/command-reference/index.md @@ -0,0 +1,11 @@ +# GTO Command Reference + +GTO is a command line tool. It works on top of Git. For a list of all commands, +type `gto -h`. + +## Typical GTO workflow + +- Register artifact versions with `gto register` +- Assign stages to them with `gto assign` +- Read the registry with `gto show` and `gto history` +- Read and interpret Git tag with `gto check-ref` diff --git a/content/docs/gto/command-reference/register.md b/content/docs/gto/command-reference/register.md new file mode 100644 index 0000000000..2911fffa52 --- /dev/null +++ b/content/docs/gto/command-reference/register.md @@ -0,0 +1,63 @@ +# register + +Create an artifact version to signify an important, published or released +iteration. + +## Synopsis + +```usage +usage: gto register [-r ] [--ver ] [-m ] + [--simple ] [--force] [--bump-major] + [--bump-minor] [--bump-patch] [--push] [-h] + name [ref] + +arguments: + name Artifact name + [ref] Git reference to use for registration [default: HEAD] +``` + +## Description + +Registering a version is usually done to mark significant changes to the +artifact. To release a new version (including the very first one), use +`gto register`. + +```cli +$ gto register awesome-model HEAD --version v0.0.1 +Created git tag 'awesome-model@v0.0.1' that registers a version +``` + +GTO creates a special Git tag for the artifact version, in +[the standard format](/doc/gto/user-guide#git-tags-format). + +The version is now associated to the current Git commit (`HEAD`). You can use +another Git commit if you provide it's hexsha as an additional argument, like +`$ gto register awesome-model abc1234`. + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `--version `, `--ver ` - Version name in SemVer format +- `-m `, `--message ` - Message to annotate the Git tag with +- `--simple ` - Use simple notation, e.g. `rf#prod` instead of `rf#prod-5` + [supported values: auto, true, false] [default: auto] +- `--force` - Create the Git tag even if it already exists and is in effect +- `--bump-major` - Bump major version +- `--bump-minor` - Bump minor version +- `--bump-patch` - Bump patch version +- `--push` - Push created tag automatically (experimental) +- `-h`, `--help` - Show this message and exit. + +## Examples + + Register new version at HEAD: + $ gto register nn + + Register new version at a specific ref: + $ gto register nn abc1234 + + Assign version name explicitly: + $ gto register nn --version v1.0.0 + + Choose a part to bump version by: + $ gto register nn --bump-minor diff --git a/content/docs/gto/command-reference/remove.md b/content/docs/gto/command-reference/remove.md new file mode 100644 index 0000000000..bb673cd2a4 --- /dev/null +++ b/content/docs/gto/command-reference/remove.md @@ -0,0 +1,26 @@ +# remove + +Remove the enrichment for given artifact. + +## Synopsis + +```usage +usage: gto remove [-r ] [--commit] [--push] [-h] + name + +arguments: + name Artifact name +``` + +## Description + +This command removes the artifact annotation from `artifacts.yaml`. Don't forget +to commit the change. + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `--commit` - Automatically commit changes due to this command (experimental) +- `--push` - Push created commit automatically (experimental) - will set + commit=True +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/show.md b/content/docs/gto/command-reference/show.md new file mode 100644 index 0000000000..674e21bd77 --- /dev/null +++ b/content/docs/gto/command-reference/show.md @@ -0,0 +1,157 @@ +# show + +Show the registry state, highest version, or what's assigned in stage. + +## Synopsis + +```usage +usage: gto show [-r ] [-a] [-A] [--json] [--plain] + [--name] [--version] [--stage] [--ref] + [--ro] [--av ] [--vs ] + [--sort ] [-h] + [name] + +arguments: + [name] Artifact name to show. If empty, show registry +``` + +## Description + +This is the entire state of the registry: all artifacts, their latest versions, +and the versions in each stage. + +```cli +$ gto show -r https://github.com/iterative/example-gto +╒══════════╀══════════╀════════╀═════════╀════════════╕ +β”‚ name β”‚ latest β”‚ #dev β”‚ #prod β”‚ #staging β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ══════════β•ͺ════════β•ͺ═════════β•ͺ════════════║ +β”‚ churn β”‚ v3.1.1 β”‚ v3.1.1 β”‚ v3.0.0 β”‚ v3.1.0 β”‚ +β”‚ segment β”‚ v0.4.1 β”‚ v0.4.1 β”‚ - β”‚ - β”‚ +β”‚ cv-class β”‚ v0.1.13 β”‚ - β”‚ - β”‚ - β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +Here we'll see artifacts that have Git tags or are annotated in +`artifacts.yaml`. The artifacts that have annotation, but have no Git tags, are +considered yet `unregistered` and will be marked with an asterisk, e.g. +`*annotated`. Use `--all-branches` or `--all-commits` to read `artifacts.yaml` +from more commits than just `HEAD`. + +Add an artifact name to print all of its versions instead: + +```cli +$ gto show churn +╒════════════╀═══════════╀══════════════╀═════════════════════╀══════════════╕ +β”‚ artifact β”‚ version β”‚ stage β”‚ created_at β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ══════════════β•ͺ═════════════════════β•ͺ══════════════║ +β”‚ churn β”‚ v3.1.0 β”‚ dev, staging β”‚ 2022-08-28 16:58:50 β”‚ churn@v3.1.0 β”‚ +β”‚ churn β”‚ v3.0.0 β”‚ prod β”‚ 2022-08-24 01:52:10 β”‚ churn@v3.0.0 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +Note, that by default, assignments are sorted by the creation time (the latest +assignment wins). You can sort them by Semver with `--sort semver` option (the +greatest version in stage wins). + +Finally, you can show the greatest version of the artifact, or what's currently +in a stage, using shortcuts like: + +```cli +$ gto show churn@greatest +╒════════════╀═══════════╀═════════╀═════════════════════╀══════════════╕ +β”‚ artifact β”‚ version β”‚ stage β”‚ created_at β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ═════════β•ͺ═════════════════════β•ͺ══════════════║ +β”‚ churn β”‚ v3.1.1 β”‚ dev β”‚ 2022-11-09 13:40:33 β”‚ churn@v3.1.1 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› + +$ gto show churn#prod +╒════════════╀═══════════╀═════════╀═════════════════════╀══════════════╕ +β”‚ artifact β”‚ version β”‚ stage β”‚ created_at β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ═════════β•ͺ═════════════════════β•ͺ══════════════║ +β”‚ churn β”‚ v3.0.0 β”‚ prod β”‚ 2022-10-28 23:53:53 β”‚ churn@v3.0.0 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› + +$ gto show churn@v3.0.0 +╒════════════╀═══════════╀═════════╀═════════════════════╀══════════════╕ +β”‚ artifact β”‚ version β”‚ stage β”‚ created_at β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ═════════β•ͺ═════════════════════β•ͺ══════════════║ +β”‚ churn β”‚ v3.0.0 β”‚ prod β”‚ 2022-10-28 23:53:53 β”‚ churn@v3.0.0 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +By default, GTO is configured to adhere to what we call "Environments" +mechanics: a single artifact version can be in multiple stages, but if you take +a specific stage, there will be only one version in it. + +There are two other approaches that you may want to use - see the details under +collapsible sections below. + +
+ +### Enable multiple versions in the same Stage workflow + +Note: this functionality is experimental and subject to change. If you find it +useful, please share your feedback in GH issues to help us make it stable. + +If you would like to see more than a single version assigned in a stage, use +`--vs` (short for `--versions-per-stage`), e.g. `-1` to show all versions. + +```cli +$ gto show churn --vs -1 +╒════════════╀═══════════╀══════════════╀═════════════════════╀══════════════╕ +β”‚ artifact β”‚ version β”‚ stage β”‚ created_at β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ══════════════β•ͺ═════════════════════β•ͺ══════════════║ +β”‚ churn β”‚ v3.1.0 β”‚ dev, staging β”‚ 2022-08-28 16:58:50 β”‚ churn@v3.1.0 β”‚ +β”‚ churn β”‚ v3.0.0 β”‚ dev, prod β”‚ 2022-08-24 01:52:10 β”‚ churn@v3.0.0 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +
+ +
+ +### Enable Kanban-like workflow + +Note: this functionality is experimental and subject to change. If you find it +useful, please share your feedback in GH issues to help us make it stable. + +If you would like the latest stage to replace all the previous stages for an +artifact version, use `--vs` flag combined with `--av` +(`--assignments-per-version` for short): + +```cli +$ gto show churn --av 1 --vs -1 +╒════════════╀═══════════╀═════════╀═════════════════════╀══════════════╕ +β”‚ artifact β”‚ version β”‚ stage β”‚ created_at β”‚ ref β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•β•β•ͺ═══════════β•ͺ═════════β•ͺ═════════════════════β•ͺ══════════════║ +β”‚ churn β”‚ v3.1.0 β”‚ staging β”‚ 2022-08-28 16:58:50 β”‚ churn@v3.1.0 β”‚ +β”‚ churn β”‚ v3.0.0 β”‚ dev β”‚ 2022-08-24 01:52:10 β”‚ churn@v3.0.0 β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +In this case the version will always have a single stage (or have no stage at +all). This resembles Kanban workflow, when you "move" your artifact version from +one column ("stage-1") to another ("stage-2"). This is how MLFlow and some other +Model Registries work. + +
+ +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `-a`, `--all-branches` - Read heads from all branches +- `-A`, `--all-commits` - Read all commits +- `--json` - Print output in json format +- `--plain` - Print table in grep-able format +- `--name` - Show artifact name +- `--version` - Output artifact version +- `--stage` - Show artifact stage +- `--ref` - Show ref +- `--ro`, `--registered-only` - Show only registered versions +- `--av `, `--assignments-per-version ` - Show N last stages + for each version. -1 for all [default: -1] +- `--vs `, `--versions-per-stage ` - Show N last versions for + each stage. -1 for all. Applied after 'assignments-per-version' [default: 1] +- `--sort ` - Order assignments by timestamp or semver [default: + timestamp] +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/command-reference/stages.md b/content/docs/gto/command-reference/stages.md new file mode 100644 index 0000000000..d9f4bd8e84 --- /dev/null +++ b/content/docs/gto/command-reference/stages.md @@ -0,0 +1,24 @@ +# stages + +Print list of stages used in the registry. + +## Synopsis + +```usage +usage: gto stages [-r ] [--allowed] [--used] [--json] + [-h] +``` + +## Description + +This command is used to get a list of stages used in the registry - whether +directly from [the config file](/doc/gto/user-guide#configuring-gto), or from +all GTO Git tags that exist. + +## Options + +- `-r `, `--repo ` - Local or remote repository [default: .] +- `--allowed` - Show allowed stages from config +- `--used` - Show stages that were ever used (from all git tags) +- `--json` - Print output in json format +- `-h`, `--help` - Show this message and exit. diff --git a/content/docs/gto/get-started.md b/content/docs/gto/get-started.md new file mode 100644 index 0000000000..6a7d2768b5 --- /dev/null +++ b/content/docs/gto/get-started.md @@ -0,0 +1,168 @@ +--- +description: + 'Learn how you can use GTO to create Artifact Registry in Git repository' +--- + +# Get Started + +GTO helps you manage machine learning artifact versions in a Git repository, and +their deployment stages (testing, shadow, production, etc.). + +Assuming GTO is already [installed](/doc/gto/install) in your Python +environment, let's clone an [example model registry] and review it's current +state with `gto show`: + +[example model registry]: https://github.com/iterative/example-gto + +```cli +$ git clone https://github.com/iterative/example-gto +$ cd example-gto + +$ gto show +╒══════════╀══════════╀════════╀═════════╀════════════╕ +β”‚ name β”‚ latest β”‚ #dev β”‚ #prod β”‚ #staging β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ══════════β•ͺ════════β•ͺ═════════β•ͺ════════════║ +β”‚ churn β”‚ v3.1.1 β”‚ v3.1.1 β”‚ v3.0.0 β”‚ v3.1.0 β”‚ +β”‚ segment β”‚ v0.4.1 β”‚ v0.4.1 β”‚ - β”‚ - β”‚ +β”‚ cv-class β”‚ v0.1.13 β”‚ - β”‚ - β”‚ - β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +3 artifacts (models `churn`, `segment`, and `cv-class`) and their `latest` +versions (per [SemVer](https://semver.org)) are listed. We also have 3 stages: +`dev`, `prod`, and `staging`. The model versions (if any) assigned to each stage +are shown. + +## Registering a new version + +`gto register` lets you mark significant artifact versions (e.g. an ML model +release). Let's register a new version of `cv-class` and check the registry +status again: + +```cli +$ gto register cv-class +Created git tag 'cv-class@v0.1.14' that registers version +To push the changes upstream, run: + git push origin cv-class@v0.1.14 + +$ gto show +╒══════════╀══════════╀════════╀═════════╀════════════╕ +β”‚ name β”‚ latest β”‚ #dev β”‚ #prod β”‚ #staging β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ══════════β•ͺ════════β•ͺ═════════β•ͺ════════════║ +β”‚ churn β”‚ v3.1.1 β”‚ v3.1.1 β”‚ v3.0.0 β”‚ v3.1.0 β”‚ +β”‚ segment β”‚ v0.4.1 β”‚ v0.4.1 β”‚ - β”‚ - β”‚ +β”‚ cv-class β”‚ v0.1.14 β”‚ - β”‚ - β”‚ - β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +This creates a Git tag attached to the latest Git commit (`HEAD`) which bumps +the artifact's version automatically (in this case from `v0.1.13` to `v0.1.14`). + +## Assigning stages + +The version we just registered looks very promising. You can promote it with +`gto assign`, for example to the `dev` stage (for testing): + +```cli +$ gto assign cv-class --stage dev +Created git tag 'cv-class#dev#1' that assigns stage to version 'v0.1.14' +To push the changes upstream, run: + git push origin cv-class#dev#1 + +$ gto show +╒══════════╀══════════╀═════════╀═════════╀════════════╕ +β”‚ name β”‚ latest β”‚ #dev β”‚ #prod β”‚ #staging β”‚ +β•žβ•β•β•β•β•β•β•β•β•β•β•ͺ══════════β•ͺ═════════β•ͺ═════════β•ͺ════════════║ +β”‚ churn β”‚ v3.1.1 β”‚ v3.1.1 β”‚ v3.0.0 β”‚ v3.1.0 β”‚ +β”‚ segment β”‚ v0.4.1 β”‚ v0.4.1 β”‚ - β”‚ - β”‚ +β”‚ cv-class β”‚ v0.1.14 β”‚ v0.1.14 β”‚ - β”‚ - β”‚ +β•˜β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•§β•β•β•β•β•β•β•β•β•β•β•β•β•› +``` + +This also creates a Git tag, which associates the latest version of `cv-class` +(`v0.1.14`) to `dev`. + +## Act in CI/CD upon registrations and assignments + +You may have noticed that `gto` reminds you how to `git push` the [tags] created +during registrations and promotions. The benefit of these Git-native mechanism +is that you can act upon GTO operations in any Git-based system downstream, for +example automating model deployments with CI/CD. + +[tags]: /doc/gto/user-guide#git-tags-format + +
+ +### Click to set up a Git remote you can push to. + + + +You'll need a [GitHub account](https://github.com/signup)) for this. + + + +1. [Fork the example repo]. Make sure you uncheck "Copy the `main` branch only" + to preserve the repo's tags. + +2. Enable the [workflows] in your fork's **Settings** -> **Actions** page. Now + its [preconfigured jobs] will trigger when Git tags are pushed. + +[fork the example repo]: https://github.com/iterative/example-gto/fork +[workflows]: https://docs.github.com/en/actions/using-workflows/about-workflows +[preconfigured jobs]: + https://github.com/iterative/example-gto/blob/main/.github/workflows/gto-act-on-tags.yml + +3. Update your local repo's default remote (`origin`) with your fork (replace + `myuser` with your GitHub username): + + ```cli + $ git remote update origin https://github.com/myuser/example-gto + ``` + +
+ +To trigger your CI/CD workflows, you can push any of the Git tags created with +GTO, for example the [latest model version](#registering-a-new-version): + +```cli +$ git push origin cv-class@v0.1.14 +* [new tag] cv-class@v0.1.14 -> cv-class@v0.1.14 +``` + +Alternatively, GTO operations can target another `--repo` directly. Let's try +the [stage assignment](#assigning-stages) again for example, but on your remote: + +```cli +# Replace myuser with your GitHub user below. +$ gto assign cv-class --stage dev \ + --repo https://github.com/myuser/example-gto +Created git tag 'cv-class#dev#1' that assigns stage to version 'v0.1.14' +Running `git push origin cv-class#dev#1` +Successfully pushed git tag cv-class#dev#1 on remote. +``` + +Note that the tag is created locally first (if not present) and then pushed to +the target repo. + + + +To see what the example repo's [CI/CD jobs] look like, see its [GitHub Actions] +page. + +[ci/cd jobs]: + https://github.com/iterative/example-gto/blob/main/.github/workflows/gto-act-on-tags.yml +[github actions]: https://github.com/iterative/example-gto/actions + + + +## What's next? + +Thanks for completing this Get Started! + +- Learn how to + [get your artifacts](/doc/gto/user-guide#getting-artifacts-downstream) when + you need them (e.g. get the latest version or the version in specific stage). +- Learn more about [acting in CI/CD](/doc/gto/user-guide#acting-in-cicd) upon + version registrations and stage assignments. +- Reach us out in [GH issues](https://github.com/iterative/gto/issues) or in + [Discord](https://discord.com/invite/dvwXA2N) to get your questions answered! diff --git a/content/docs/gto/index.md b/content/docs/gto/index.md new file mode 100644 index 0000000000..38ed5d5d98 --- /dev/null +++ b/content/docs/gto/index.md @@ -0,0 +1,41 @@ +# GTO Documentation + +**GTO** (Git Tag Ops) is a tool for creating an Artifact Registry in your Git +repository. An important special case is a +[Machine Learning Model Registry](/doc/use-cases/model-registry). + +Such a registry serves as a centralized place to store and operationalize your +artifacts along with their metadata; manage artifact's life-cycle, versions & +releases, and easily automate tests and deployments using GitOps. + + + + + A step-by-step introduction into basic GTO features + + + + Study the detailed inner-workings of GTO in its user guide. + + + + Using GTO to build a Model Registry + + + + See all of GTO's commands + + + + +βœ… Please join our [community](https://dvc.org/community) or use the +[support](https://dvc.org/support) channels if you have any questions or need +specific help. We are very responsive ⚑. + +βœ… Check out our [GitHub repository](https://github.com/iterative/gto) and give +us a ⭐ if you like the project! + +βœ… Contribute to GTO [on GitHub](https://github.com/iterative/gto) or help us +improve this +[documentation](https://github.com/iterative/dvc.org/tree/main/content/docs/gto) +πŸ™. diff --git a/content/docs/gto/install.md b/content/docs/gto/install.md new file mode 100644 index 0000000000..cf38da7ec7 --- /dev/null +++ b/content/docs/gto/install.md @@ -0,0 +1,31 @@ +# Installation + +You'll need [Python](https://www.python.org/) to install GTO, and +[Git](https://git-scm.com/) to use it. + +To check whether GTO is installed in your environment, run `which gto`. To check +which version is installed, run `gto --version`. + +## Install as a Python library + +GTO is distributed as a Python library, so it works on any OS. You can install +it with a package manager like [pip](https://pypi.org/project/pip/) or +[Conda](https://docs.conda.io/en/latest/). + + + +We **strongly** recommend creating a [virtual environment] or using [pipx] to +encapsulate your local environment. + +[virtual environment]: https://python.readthedocs.io/en/stable/library/venv.html +[pipx]: + https://packaging.python.org/guides/installing-stand-alone-command-line-tools/ + + + +```cli +$ pip install gto +``` + +This will install the [`gto`](/doc/gto/command-reference) command-line interface +(CLI) and make the Python API available for use in code. diff --git a/content/docs/gto/user-guide/index.md b/content/docs/gto/user-guide/index.md new file mode 100644 index 0000000000..6727af7272 --- /dev/null +++ b/content/docs/gto/user-guide/index.md @@ -0,0 +1,264 @@ +# User Guide + +GTO lets you build an Artifact Registry or [Model Registry] out of your Git +repository by creating annotated +[Git tags](https://git-scm.com/book/en/v2/Git-Basics-Tagging) with a +[special format](#git-tags-format). To read more about building a Model +Registry, read this [Studio User Guide]. + +[Model Registry]: http://dvc.org/doc/use-cases/model-registry +[Studio User Guide]: + https://dvc.org/doc/studio/user-guide/model-registry/what-is-a-model-registry + +## Finding the right artifact version + +You may need to get a specific artifact version to a certain environment, most +likely the latest one or the one currently assigned to the stage. Use `gto show` +to find the [Git reference] (tag) you need. + +[git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References + +Get the git tag for the latest version: + +```cli +$ gto show churn@latest --ref +churn@v3.1.1 +``` + +Get the git tag for the version in `prod` stage: + +```cli +$ gto show churn#prod --ref +churn@v3.0.0 +``` + +GTO doesn't provide a way to deliver the artifacts, but you can use DVC or any +method to retrieve files from the repo. With DVC, you can use [`dvc get`]: + +```cli +$ dvc get $REPO $ARTIFACT_PATH --rev $REVISION -o $OUTPUT_PATH +``` + + + +You can also use DVC with GTO to: + +- [Store large artifacts] (models and data) and track pointers to them in your + repo. +- [Keep artifact metadata] like the path or type (`model` or `dataset`). To see + an example, check out the [`example-gto` repo]. + + + +[`dvc get`]: https://dvc.org/doc/command-reference/get +[store large artifacts]: + https://dvc.org/doc/start/data-management/data-versioning +[keep artifact metadata]: + https://dvc.org/doc/user-guide/project-structure/dvcyaml-files#artifacts +[`example-gto` repo]: + https://github.com/iterative/example-gto/blob/main/dvc.yaml + +## Acting on new registrations and assignments + +A popular option to act on Git tags pushed in your repo is to set up CI/CD. To +see an example, check out +[the workflow in `example-gto` repo](https://github.com/iterative/example-gto/blob/main/.github/workflows/gto-act-on-tags.yml). +The workflow uses [the GTO GH Action](https://github.com/iterative/gto-action) +that fetches all Git tags (to correctly interpret the Registry), finds out the +`version` of the artifact that was registered, the `stage` that was assigned, +and annotations details such as `path`, `type`, `description`, etc, so you could +use them in the next steps of the CI. Note that it finds these annotation +details by +[reading `dvc.yaml` managed by DVC](/doc/gto/user-guide/#using-dvc-to-annotate-artifacts). + +If you're working with GitLab or BitBucket, feel free to create an issue asking +for a similar action, or submit yours for us to add to documentation. + +[env var in github actions]: + https://docs.github.com/en/actions/learn-github-actions/environment-variables + +
+ +### Other approaches: webhooks and polling Git forge API + +Besides using CI/CD, the other option is to +[configure webhooks](https://docs.github.com/en/rest/webhooks) that will send +HTTP requests to your server upon pushing Git tags to the remote. + +Besides, you can configure your server to query your Git provider via something +like REST API to check if changes happened. As an example, check out +[Github REST API](https://docs.github.com/en/rest). + +
+ +### CI/CD workflow examples + +We use MLEM in these examples, but you can use any other tool to build, publish +or deploy your models, or do any other action with your artifacts. + + + + +This workflow will build a docker image out of the model and push it to a +DockerHub. + +```yaml +# .github/workflows/build.yaml +on: + push: + tags: + - '*' + +jobs: + act: + name: Build a Docker image for new model versions + runs-on: ubuntu-latest + steps: + - name: Login to Docker Hub + uses: docker/login-action@v2 + # set credentials to login to DockerHub + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - uses: actions/checkout@v3 + - id: gto + uses: iterative/gto-action@v2 + - uses: actions/setup-python@v2 + - name: Install dependencies + run: | + pip install --upgrade pip setuptools wheel + pip install -r requirements.txt + - if: steps.gto.outputs.event == 'registration' + run: | + mlem build docker \ + --model '${{ steps.gto.outputs.path }}' \ + --image.name ${{ steps.gto.outputs.name }} \ + --image.tag '${{ steps.gto.outputs.version }}' \ + --image.registry docker_io +``` + +[Learn more](/doc/user-guide/building) about building Docker images, Python +packages or preparing `docker build`-ready folders from your models with MLEM. + + + + +This workflow will deploy a model to Heroku upon stage assignment: + +```yaml +# .github/workflows/deploy.yaml +on: + push: + tags: + - '*' + +# set credentials to run deployment and save its state to s3 +env: + HEROKU_API_KEY: ${{ secrets.HEROKU_API_KEY }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + +jobs: + act: + name: Deploy a model upon stage assignment + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - id: gto + uses: iterative/gto-action@v2 + - uses: actions/setup-python@v2 + - name: Install dependencies + run: | + pip install --upgrade pip setuptools wheel + pip install -r requirements.txt + - if: steps.gto.outputs.event == 'assignment' + run: | + # TODO: check this works + mlem deployment run \ + --load deploy/${{ steps.gto.outputs.stage }} \ + --model ${{ steps.gto.outputs.path }} +``` + +This relies on having [deployment declarations](/doc/user-guide/deploying) in +the `deploy/` directory, such as: + +```yaml +# deploy/dev.yaml +object_type: deployment +type: heroku +app_name: mlem-dev +``` + +This declaration is read by MLEM in CI and the model promoted to `dev` is +deployed to https://mlem-dev.herokuapp.com. + +Note, that you need to provide environment variables to deploy to Heroku and +update the [deployment state](/doc/user-guide/deploying). The location for the +state should be +[configured](/doc/user-guide/deploying#setting-up-remote-state-manager) in MLEM +config file: + +```yaml +# .mlem.yaml +core: + state: + uri: s3://bucket/path +``` + +Check out [another example](https://github.com/iterative/example-gto/tree/mlem) +of MLEM model deployment in the `main` branch of the `example-gto` repo. + + + + +## Configuring GTO + +To configure GTO, use file `.gto` in the root of your repo: + +```yaml +# .gto config file +stages: [dev, stage, prod] # list of allowed Stages +``` + +When allowed Stages are specified, GTO will check commands you run and error out +if you provided a value that doesn't exist in the config. Note, that GTO applies +the config from the workspace, so if want to apply the config from `main` +branch, you need to check it out first with `git checkout main`. + +Alternatively, you can use environment variables (note the `GTO_` prefix) + +```cli +$ GTO_EMOJIS=false gto show +``` + +## Git tags format + + + +You can work with GTO without knowing these conventions, since +[`gto` commands](/doc/command-reference) take care of everything for you. + + + +All events have the standard formats of Git tags: + +- `{artifact_name}@{version_number}#{e}` for version registration. +- `{artifact_name}@{version_number}!#{e}` for version deregistration. +- `{artifact_name}#{stage}#{e}` for stage assignment. +- `{artifact_name}#{stage}!#{e}` for stage unassignment. +- `{artifact_name}@deprecated#{e}` for artifact deprecation. + +All of them share two parts: + +1. `{artifact_name}` prefix part. +2. `#{e}` counter at the end that can be omitted (in "simple" Git tag format). + +Generally, `#{e}` counter is used, because Git doesn't allow to create two Git +tags with the same name. If you want to have two Git tags that assign `dev` +stage to `model` artifact without the counter (`model#dev`), that will require +deleting the old Git tag first. Consequently, that doesn't allow you to preserve +history of events that happened. + +By default, `#{e}` sometimes is omitted, sometimes not. We are setting defaults +to omit using `#{e}` when it's rarely necessary, e.g. for version registrations +and artifact deprecations. diff --git a/content/docs/gto/why-gto.md b/content/docs/gto/why-gto.md new file mode 100644 index 0000000000..39e3e9c25f --- /dev/null +++ b/content/docs/gto/why-gto.md @@ -0,0 +1,76 @@ +# Why GTO? + +**GTO** is a tool for creating an Artifact Registry in your Git repository. One +of the special cases we would like to highlight is creating a +[Machine Learning Model Registry](/doc/use-cases/model-registry). + +
+ +## Why do we need such a Registry? + +Such a registry serves as a centralized place to store and operationalize your +artifacts along with their metadata; manage model life-cycle, versions & +releases, and easily automate tests and deployments using GitOps. + +Usually, Artifact Registry usage follows these three steps: + +- **Registry**. Track new artifacts and their versions for releases and + significant changes. Usually this is needed for keeping track of lineage. +- **Lifecycle Management**. Create actionable stages for versions marking status + of artifact or it's readiness to be consumed by a specific environment. +- **Downstream Usage**. Signal CI/CD automation or other downstream systems to + act upon these new versions and lifecycle updates. + +GTO helps you achieve all of them in a [GitOps](https://www.gitops.tech) way. If +you would like to see an example, please follow +[Get Started](/doc/gto/get-started). + +
+ +In Software Engineering, Git is a heart of the Software system. The code is +committed to Git and CI/CD triggers on new commits making the downstream action +necessary. Such approaches as [GitOps](https://www.gitops.tech) made huge steps +towards automation of development cycles, reducing errors and helping maintain +productive software development. + +Artifact Registries (and Model Registries in specific) usually introduce a +separate service or infrastructure, as well as new set of APIs to integrate +with. This often leads to a necessity to maintain two different systems, which +is a significant overhead. For example, if you work in Machine Learning, you +often need two teams (Data Science specialists and Software Engineers) each +responsible of maintaining their part of the system. + +![](https://i.imgur.com/GTcrytE.png) + +GTO builds that on top of Git repository using Git tags to register versions and +assign stages, and using `artifacts.yaml` file to keep the metainformation about +artifacts, such as `path`, `type`, `description` and etc. If your artifact +development is built around Git, you won't need to introduce new things for your +team to manage. + +One example (although specific to Model Registry) is really good at +demonstrating this problem of handling two worlds at the same time. When you +train your Machine Learning models, you have to know what code and data was used +to do it. If Model Registry lives in a separate system, you (or the code you've +written) have to record the code and data snapshots (or just a Git commit +hexsha). Now if you forgot to record the hexsha when you registered a new model +version in Model Registry, or used an incorrect hexsha, no one can reproduce +your training process. Keeping track of both models and their versions in Git +solves that problem. + +![](https://i.imgur.com/gViAnOu.png) + +## Limitations + +There are few limitations to the GTO approach to building an Artifact Registry: + +- You shouldn't commit artifact binaries to Git itself. You should use Git-lfs, + or use DVC and other similar tools. +- Some teams develop artifacts (models) in a single monorepository, sometimes in + many separate ones. Since GTO operates with Git tags and files in a Git + Repository, it can't handle multiple repositories at a single time. +- GTO is a command-line and Python API tool. That makes it friendly for + engineers, although for less technical folks a Visual UI may be required. + +If you hit the last two limitations, you may find +[Studio](https://dvc.org/doc/studio) useful. diff --git a/content/docs/sidebar.json b/content/docs/sidebar.json index fda47b7801..af7e87129a 100644 --- a/content/docs/sidebar.json +++ b/content/docs/sidebar.json @@ -865,5 +865,74 @@ "label": "Environment Variables" } ] + }, + { + "slug": "gto", + "label": "GTO", + "source": "gto/index.md", + "children": [ + { + "slug": "install", + "source": "install.md", + "label": "Installation" + }, + { + "slug": "get-started", + "label": "Get Started", + "source": "get-started.md" + }, + { + "slug": "user-guide", + "label": "User Guide", + "source": "user-guide/index.md" + }, + { + "slug": "command-reference", + "label": "Command Reference", + "source": "command-reference/index.md", + "children": [ + { + "slug": "assign", + "source": "assign.md", + "label": "assign" + }, + { + "slug": "check-ref", + "source": "check-ref.md", + "label": "check-ref" + }, + { + "slug": "deprecate", + "source": "deprecate.md", + "label": "deprecate" + }, + { + "slug": "doctor", + "source": "doctor.md", + "label": "doctor" + }, + { + "slug": "history", + "source": "history.md", + "label": "history" + }, + { + "slug": "register", + "source": "register.md", + "label": "register" + }, + { + "slug": "show", + "source": "show.md", + "label": "show" + }, + { + "slug": "stages", + "source": "stages.md", + "label": "stages" + } + ] + } + ] } ] diff --git a/content/docs/studio/troubleshooting.md b/content/docs/studio/troubleshooting.md index ed51706841..36b97729fb 100644 --- a/content/docs/studio/troubleshooting.md +++ b/content/docs/studio/troubleshooting.md @@ -391,8 +391,8 @@ Check out the [Frequently Asked Questions](https://studio.iterative.ai/faq) to see if your questions have already been answered. If you still have problems, please [contact us](#support). -[gto]: https://mlem.ai/doc/gto +[gto]: /doc/gto [register the model]: /doc/studio/user-guide/model-registry/add-a-model -[`gto` cli]: https://mlem.ai/doc/gto/command-reference +[`gto` cli]: /doc/gto/command-reference [create a project]: /doc/studio/user-guide/projects-and-experiments/create-a-project diff --git a/content/docs/studio/user-guide/model-registry/add-a-model.md b/content/docs/studio/user-guide/model-registry/add-a-model.md index fa68466afc..1d073234fa 100644 --- a/content/docs/studio/user-guide/model-registry/add-a-model.md +++ b/content/docs/studio/user-guide/model-registry/add-a-model.md @@ -71,7 +71,7 @@ before the commit is created: [connected project]: /doc/studio/user-guide/projects-and-experiments/create-a-project -[gto]: https://mlem.ai/doc/gto +[gto]: /doc/gto [mlem]: https://mlem.ai/ [dvclive]: /doc/dvclive [log_artifact]: /doc/dvclive/live/log_artifact diff --git a/content/docs/studio/user-guide/model-registry/assign-stage.md b/content/docs/studio/user-guide/model-registry/assign-stage.md index fad6a35e08..ca7964dcf2 100644 --- a/content/docs/studio/user-guide/model-registry/assign-stage.md +++ b/content/docs/studio/user-guide/model-registry/assign-stage.md @@ -65,12 +65,12 @@ If you go to your Git repository, you will see that a new Git tag referencing the selected version and stage has been created, indicating the stage assignment. -[gto]: https://mlem.ai/doc/gto +[gto]: /doc/gto [git tag]: https://git-scm.com/docs/git-tag -[gto-format]: https://mlem.ai/doc/gto/user-guide#git-tags-format +[gto-format]: /doc/gto/user-guide#git-tags-format [CI/CD]: /doc/studio/user-guide/model-registry/use-models#deploying-and-publishing-models-in-cicd [MLEM]: https://mlem.ai/ -[assign]: https://mlem.ai/doc/gto/command-reference/assign +[assign]: /doc/gto/command-reference/assign [monorepo]: https://github.com/iterative/monorepo-example/blob/add-cv-model/cv/dvc.yaml diff --git a/content/docs/studio/user-guide/model-registry/register-version.md b/content/docs/studio/user-guide/model-registry/register-version.md index 438acf3743..11f4154b94 100644 --- a/content/docs/studio/user-guide/model-registry/register-version.md +++ b/content/docs/studio/user-guide/model-registry/register-version.md @@ -60,14 +60,14 @@ available in the model `History` section as well as in the versions drop down. If you go to your Git repository, you will see that a new Git tag referencing the selected commit has been created, representing the new version. -[gto]: https://mlem.ai/doc/gto +[gto]: /doc/gto [git tag]: https://git-scm.com/docs/git-tag -[gto-format]: https://mlem.ai/doc/gto/user-guide#git-tags-format -[`gto` cli]: https://mlem.ai/doc/gto/command-reference +[gto-format]: /doc/gto/user-guide#git-tags-format +[`gto` cli]: /doc/gto/command-reference [semver]: https://semver.org/ [CI/CD]: /doc/studio/user-guide/model-registry/use-models#deploying-and-publishing-models-in-cicd [MLEM]: https://mlem.ai/ -[register]: https://mlem.ai/doc/gto/command-reference/register +[register]: /doc/gto/command-reference/register [monorepo]: https://github.com/iterative/monorepo-example/blob/add-cv-model/cv/dvc.yaml diff --git a/content/docs/studio/user-guide/model-registry/remove-a-model-or-its-details.md b/content/docs/studio/user-guide/model-registry/remove-a-model-or-its-details.md index 77da8641af..f0cd378699 100644 --- a/content/docs/studio/user-guide/model-registry/remove-a-model-or-its-details.md +++ b/content/docs/studio/user-guide/model-registry/remove-a-model-or-its-details.md @@ -2,7 +2,7 @@ When you remove (deprecate) a model, deregister a version or unassign a stage, Iterative Studio -[creates Git tags that indicate the action](https://mlem.ai/doc/gto/command-reference/deprecate) +[creates Git tags that indicate the action](/doc/gto/command-reference/deprecate) and saves the tags in your Git repository. These actions can be found in the 3-dot menu next to the model name in the diff --git a/content/docs/studio/user-guide/model-registry/use-models.md b/content/docs/studio/user-guide/model-registry/use-models.md index 82127ffdf7..eb0cf85abc 100644 --- a/content/docs/studio/user-guide/model-registry/use-models.md +++ b/content/docs/studio/user-guide/model-registry/use-models.md @@ -34,7 +34,7 @@ written. To find out the Git revision, you can check the [list of Git tags](https://github.com/iterative/example-get-started/tags) or use -[GTO](https://mlem.ai/doc/gto/command-reference/show/): +[GTO](/doc/gto/command-reference/show/): ```cli $ gto show text-classification@latest --ref @@ -78,4 +78,4 @@ from CI/CD using MLEM. Finally, you can find examples of building a Docker image with a model and deploying it to the cloud in the -[GTO user guide](https://mlem.ai/doc/gto/user-guide#acting-on-new-registrations-and-assignments). +[GTO user guide](/doc/gto/user-guide#acting-on-new-registrations-and-assignments). diff --git a/content/docs/studio/user-guide/model-registry/view-and-compare-models.md b/content/docs/studio/user-guide/model-registry/view-and-compare-models.md index 0d5ef82e70..3f999bd307 100644 --- a/content/docs/studio/user-guide/model-registry/view-and-compare-models.md +++ b/content/docs/studio/user-guide/model-registry/view-and-compare-models.md @@ -19,13 +19,6 @@ framework, repository, etc. Iterative Studio consolidates the stages of all the models in the registry, and provides a way to filter models by stages. -Iterative Studio also consolidates the frameworks of all the models in the -registry and provides a way to filter models by framework. Note that the -framework of a model is identified by Iterative’s model deployment tool [MLEM]. -If you have not used MLEM, then Iterative Studio will use a generic framework -icon to indicate that the model framework was not identified, and that the model -was registered using [GTO]. - You can take a look at the [models dashboard] in Iterative's public (read only) model registry. @@ -57,11 +50,7 @@ A model details page is divided into the following sections: - all assigned stages, - version description and labels, - path to the model, - - metrics, params and plots, and - - model requirements and methods. Note that the requirements and methods are - identified by Iterative’s model deployment tool [MLEM]. If you have not used - MLEM, then Iterative Studio will not display any requirements or methods for - the model. + - metrics, params and plots. - Right section: The right section contains information that is applicable across all the versions of the model. In particular, it displays - the assigned stages for the different versions, and @@ -89,8 +78,7 @@ This way you can compare both registered model versions and unregistered experimental iterations and make a decision to register a new version out of the latter. -[mlem]: https://mlem.ai/ -[gto]: https://mlem.ai/doc/gto +[gto]: /doc/gto [models dashboard]: https://studio.iterative.ai/team/Iterative/models [model detail page]: https://studio.iterative.ai/team/Iterative/models/PTzV-9EJgmZ6TGspXtwKqw==/lightgbm-model/v2.0.1 diff --git a/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md b/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md index 469a241dc3..351eb28a94 100644 --- a/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md +++ b/content/docs/studio/user-guide/model-registry/what-is-a-model-registry.md @@ -21,8 +21,7 @@ You can take a look at Iterative's [public model registry] (read only) to get a feel for what's possible. [semantic versioning]: https://semver.org/ -[gto]: https://mlem.ai/doc/gto -[mlem]: https://mlem.ai/ -[`gto` cli]: https://mlem.ai/doc/gto/command-reference +[gto]: /doc/gto +[`gto` cli]: /doc/gto/command-reference [public model registry]: https://studio.iterative.ai/team/Iterative/models [dvc]: /doc diff --git a/content/docs/studio/user-guide/projects-and-experiments/what-is-a-project.md b/content/docs/studio/user-guide/projects-and-experiments/what-is-a-project.md index 2d1a724d9b..9ccdd559d9 100644 --- a/content/docs/studio/user-guide/projects-and-experiments/what-is-a-project.md +++ b/content/docs/studio/user-guide/projects-and-experiments/what-is-a-project.md @@ -46,7 +46,7 @@ Studio if you configure the CI job accordingly. [Learn more][live-metrics-and-plots] To **add model metadata** to your repositories, you can use Iterative Studio -Model Registry, or the underlying [GTO] or [MLEM]. [Learn more][model-registry] +Model Registry, or the underlying DVC. [Learn more][model-registry] [on project settings]: /doc/studio/user-guide/projects-and-experiments/configure-a-project#non-dvc-repositories @@ -63,8 +63,6 @@ Model Registry, or the underlying [GTO] or [MLEM]. [Learn more][model-registry] [live-metrics-and-plots]: /doc/studio/user-guide/projects-and-experiments/live-metrics-and-plots [dvclive]: /doc/dvclive -[gto]: https://mlem.ai/doc/gto -[mlem]: https://mlem.ai/ [store and share your data and model files]: /doc/start/data-management/data-versioning#storing-and-sharing [create data registries]: /doc/use-cases/data-registry diff --git a/content/docs/use-cases/model-registry.md b/content/docs/use-cases/model-registry.md index 3136cc5b21..71ac9b3d94 100644 --- a/content/docs/use-cases/model-registry.md +++ b/content/docs/use-cases/model-registry.md @@ -8,9 +8,6 @@ more division between ML engineering and operations! ![](/img/ml_model_registry.jpg) _MLOps from modeling to production_ -[gto]: https://mlem.ai/doc/gto -[mlem]: https://mlem.ai/ - Model registry enables end-to-end workflows: - **Log your model**: Start by logging your model's performance metrics and diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx index b342bbf404..685d3779b5 100644 --- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx +++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx @@ -16,7 +16,6 @@ import { ReactComponent as DiscordSVG } from '@dvcorg/gatsby-theme-iterative/src import { ReactComponent as CmlSVG } from '../../../../../static/img/cml_icon-color--square_vector.svg' import { ReactComponent as StudioSVG } from '../../../../../static/img/studio_icon-color--square_vector.svg' import { ReactComponent as IterativeSVG } from '../../../../../static/img/iterative_icon-color--square_vector.svg' -import { ReactComponent as MlemSVG } from '../../../../../static/img/mlem-icon.svg' import * as styles from '@dvcorg/gatsby-theme-iterative/src/components/LayoutFooter/styles.module.css' @@ -121,12 +120,6 @@ const footerListsData: Array = [ icon: , target: '_blank' }, - { - href: 'https://mlem.ai/', - text: 'MLEM', - icon: , - target: '_blank' - }, { href: 'https://marketplace.visualstudio.com/items?itemName=Iterative.dvc', text: 'VS Code Extension', diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/styles.module.css b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/styles.module.css index bd12f6db13..b544187717 100644 --- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/styles.module.css +++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/styles.module.css @@ -47,10 +47,6 @@ background-image: url('../../../../../../../static/img/studio_icon-color--square_vector.svg'); } -.mlemIcon { - background-image: url('../../../../../../../static/img/mlem-icon.svg'); -} - .vscodeIcon { background-image: url('../../../../../../../static/img/vscode-icon.svg'); } diff --git a/src/@dvcorg/gatsby-theme-iterative/data/menu.ts b/src/@dvcorg/gatsby-theme-iterative/data/menu.ts index 557f4ace9f..8acfb828c9 100644 --- a/src/@dvcorg/gatsby-theme-iterative/data/menu.ts +++ b/src/@dvcorg/gatsby-theme-iterative/data/menu.ts @@ -156,15 +156,6 @@ const menuData: IMenuData = { href: 'https://cml.dev/', img: '/img/cml_icon-color--square_vector.svg', imgAlt: 'CML logo' - }, - { - title: 'MLEM', - description: - 'Open-source model registry and deployment tool for ML projects', - iconClass: styles.mlemIcon, - href: 'https://mlem.ai/', - img: '/img/mlem-icon.svg', - imgAlt: 'MLEM logo' } ] } diff --git a/src/@dvcorg/gatsby-theme-iterative/data/styles.module.css b/src/@dvcorg/gatsby-theme-iterative/data/styles.module.css index 88275a3a81..899e762d09 100644 --- a/src/@dvcorg/gatsby-theme-iterative/data/styles.module.css +++ b/src/@dvcorg/gatsby-theme-iterative/data/styles.module.css @@ -10,10 +10,6 @@ background-image: url('../../../../static/img/studio_icon-color--square_vector.svg'); } -.mlemIcon { - background-image: url('../../../../static/img/mlem-icon.svg'); -} - .vscodeIcon { background-image: url('../../../../static/img/vscode-icon.svg'); } From fe191da9c7d3e32f2679fc5e9f3c757e97a5cdf6 Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Thu, 31 Aug 2023 12:44:33 +0200 Subject: [PATCH 2/3] one more --- content/docs/studio/user-guide/model-registry/add-a-model.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/content/docs/studio/user-guide/model-registry/add-a-model.md b/content/docs/studio/user-guide/model-registry/add-a-model.md index 1d073234fa..06fc52885e 100644 --- a/content/docs/studio/user-guide/model-registry/add-a-model.md +++ b/content/docs/studio/user-guide/model-registry/add-a-model.md @@ -32,8 +32,8 @@ https://www.youtube.com/watch?v=szzv4ZXmYAs 4. Enter the path of the model file as follows: - If the model file is in the Git repository or is in the cloud but is - tracked by DVC and/or [MLEM], enter the relative path of the model (from - the repository root). + tracked by DVC, enter the relative path of the model (from the repository + root). - Otherwise, enter the URL to the model file in the cloud. Iterative Studio will ask you for the repository path where the dvc reference to the model should be saved. @@ -72,7 +72,6 @@ before the commit is created: [connected project]: /doc/studio/user-guide/projects-and-experiments/create-a-project [gto]: /doc/gto -[mlem]: https://mlem.ai/ [dvclive]: /doc/dvclive [log_artifact]: /doc/dvclive/live/log_artifact [`artifacts`]: /doc/user-guide/project-structure/dvcyaml-files#artifacts From d2e34dd254fca70f7f0f37921c732c7458debd6d Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Fri, 1 Sep 2023 15:50:05 +0200 Subject: [PATCH 3/3] fix missing links --- content/docs/gto/user-guide/index.md | 48 +++------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) diff --git a/content/docs/gto/user-guide/index.md b/content/docs/gto/user-guide/index.md index 6727af7272..c27f7845b2 100644 --- a/content/docs/gto/user-guide/index.md +++ b/content/docs/gto/user-guide/index.md @@ -93,9 +93,6 @@ like REST API to check if changes happened. As an example, check out ### CI/CD workflow examples -We use MLEM in these examples, but you can use any other tool to build, publish -or deploy your models, or do any other action with your artifacts. - @@ -130,16 +127,10 @@ jobs: pip install -r requirements.txt - if: steps.gto.outputs.event == 'registration' run: | - mlem build docker \ - --model '${{ steps.gto.outputs.path }}' \ - --image.name ${{ steps.gto.outputs.name }} \ - --image.tag '${{ steps.gto.outputs.version }}' \ - --image.registry docker_io + # build docker image and push it to Dockerhub + ... ``` -[Learn more](/doc/user-guide/building) about building Docker images, Python -packages or preparing `docker build`-ready folders from your models with MLEM. - @@ -173,41 +164,10 @@ jobs: pip install -r requirements.txt - if: steps.gto.outputs.event == 'assignment' run: | - # TODO: check this works - mlem deployment run \ - --load deploy/${{ steps.gto.outputs.stage }} \ - --model ${{ steps.gto.outputs.path }} -``` - -This relies on having [deployment declarations](/doc/user-guide/deploying) in -the `deploy/` directory, such as: - -```yaml -# deploy/dev.yaml -object_type: deployment -type: heroku -app_name: mlem-dev + # deploy your model here + ... ``` -This declaration is read by MLEM in CI and the model promoted to `dev` is -deployed to https://mlem-dev.herokuapp.com. - -Note, that you need to provide environment variables to deploy to Heroku and -update the [deployment state](/doc/user-guide/deploying). The location for the -state should be -[configured](/doc/user-guide/deploying#setting-up-remote-state-manager) in MLEM -config file: - -```yaml -# .mlem.yaml -core: - state: - uri: s3://bucket/path -``` - -Check out [another example](https://github.com/iterative/example-gto/tree/mlem) -of MLEM model deployment in the `main` branch of the `example-gto` repo. -