From 25a4bcc08e61a6e5d2c0d99e3f196231942b91c9 Mon Sep 17 00:00:00 2001 From: Casey Marshall Date: Fri, 20 Oct 2023 16:52:53 -0500 Subject: [PATCH 1/4] chore: set up mkdocs Add mkdocs configuration and relocate the project README to the `docs/` subdirectory, bringing it under the control of mkdocs. This README will still be shown on the project main page per https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-readmes#about-readmes --- README.md => docs/README.md | 0 mkdocs.yml | 9 +++++++++ pyproject.toml | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) rename README.md => docs/README.md (100%) create mode 100644 mkdocs.yml diff --git a/README.md b/docs/README.md similarity index 100% rename from README.md rename to docs/README.md diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..d6a21b9 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,9 @@ +site_name: Snyk Tags Tool +nav: + - Home: README.md + +theme: + name: readthedocs + highlightjs: true +plugins: + - search diff --git a/pyproject.toml b/pyproject.toml index 0000259..584f388 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "2.2.1" description = "CLI tool designed to manage tags and attributes at scale" authors = ["EricFernandezSnyk "] license = "MIT" -readme = "README.md" +readme = "docs/README.md" repository = "https://github.com/snyk-labs/snyk-tags-tool" keywords =["snyk"] From a3cb8d93ce041b18c6d65aec8dd457d769740adb Mon Sep 17 00:00:00 2001 From: Casey Marshall Date: Fri, 20 Oct 2023 17:08:13 -0500 Subject: [PATCH 2/4] chore: restructure and improve components documentation Split out components documentation to a separate, more focused page. --- docs/README.md | 124 ++------------------------------------------- docs/components.md | 119 +++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 3 files changed, 125 insertions(+), 119 deletions(-) create mode 100644 docs/components.md diff --git a/docs/README.md b/docs/README.md index 0dc2009..950460e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -10,6 +10,7 @@ - Help filter Snyk projects by applying attributes to a target import (for example a git repo like **snyk-labs/nodejs-goof**) - using ```snyk-tags target attributes``` or from a csv/json file with ```snyk-tags fromfile target-attributes``` - Help filter Snyk projects by adding the GitHub CODEOWNERS (only GitHub handles) as tags to the target import (must be a GitHub repo in the form **snyk-labs/nodejs-goof**) - using ```snyk-tags target github owners``` - Help with tag management by removing tags from a Group or a target import (for example a git repo like **snyk-labs/nodejs-goof**) - using ```snyk-tags target remove``` or listing all tags using ```snyk-tags list tags``` (also in bulk or from a csv/json file with ```snyk-tags fromfile```) +- [Associate Snyk Open Source, Code and Container projects](https://docs.snyk.io/manage-risk/insights/insights-setup/insights-setup-associating-snyk-open-source-code-and-container-projects#examples-of-project-tags) with software component tags, using ```snyk-tags component tag```. ### **snyk-tags tag** @@ -54,6 +55,10 @@ To import GitHub metadata such as CODEOWNERS or Topics, you can use this command Once you run ```snyk-tags```, go into the UI, naviagate to the projects page and find the tags filter or attribute filter options on the left-hand menu. Select the tag/attribute you have applied and you will see all projects associated. +### **snyk-tags component tag** + +```snyk-tags component tag``` automates tagging software components at scale for Snyk, based on powerful regular-expression based rules. Read more about this feature in [components](components.md). + ## **Installation and requirements** ### **Requirements** @@ -145,52 +150,6 @@ I want to filter all projects within ```snyk-labs/nodejs-goof``` and ```snyk-lab snyk-tags fromfile target-tag --file=path/to/file.csv --snyktkn ``` -### Defining software component tags for Snyk Insights - -I want to add `component` tags on projects in my Snyk Organization for [Snyk Insights](https://docs.snyk.io/manage-issues/insights/insights-setup/insights-setup-associating-snyk-open-source-code-and-container-projects), based on rules which match and extract certain features of project and attributes. See section on [Component Tags](#component-tags-for-snyk-insights) below. - -```bash -snyk-tags component tag --org-id=abc rules.yaml -``` - -I want to preview component tag processing changes before applying them. - -```bash -snyk-tags component tag --dry-run rules.yaml -``` - -I want to remove all component tags, as determined by the same rules. - -```bash -snyk-tags component tag --remove rules.yaml -``` - -I want to replace _all_ component tags that might exist on matching projects with only those specified by the rules. - -```bash -snyk-tags component tag --exclusive rules.yaml -``` - -I want to remove _all_ component tags from matching projects. - -```bash -snyk-tags component tag --remove --exclusive rules.yaml -``` - -#### Formatting options - -I want to store a CSV report of component tag rule processing to a file. - -```bash -snyk-tags component tag --format csv rules.yaml | tee component-tags.csv -``` - -I want to append a newline-delimited JSON (ndjson) log of component tag processing to a file. - -```bash -snyk-tags component tag --format json rules.yaml | tee -a component-tags.ndjson -``` - ## Types of projects and attributes ### List of all project types @@ -230,76 +189,3 @@ snyk-tags component tag --format json rules.yaml | tee -a component-tags.ndjson | | onprem | | | | hosted | | | | distributed | | - -## Component tags for Snyk Insights - -Part of the setup process for Snyk Insights involves associating Snyk Open Source, Code and Container projects together. For a large organization this can be a daunting task. The `snyk-tags component tag` command allows automating the application of such tags based on regular expression matching and extraction. - -This tool may be run up-front as part of onboarding, but also as a regular batch job. This allows component tags to be more centrally managed across an organization. - -### Component rules - -The format for the rules file is as follows: - -```yaml -# 'version' is the version of this rules file format, currently 1 -version: 1 - -# 'rules' is an array of rule objects. -# Rule objects are evaluated against each project in the specified --org-id -# The first rule that matches is used to tag the project with its component: tag. -# Rules are applied in the order in which they appear in this file. -rules: - - # A rule which normalizes component names across different types of projects. - # If you inspect the contents of Snyk projects, you'll find that different - # origins contain different identifiers and in different formats. - - name: my-retail-store - - # 'projects' is a list of project matchers. Just like the rules, these are - # applied in the order in which they are defined here, the first one that - # matches is used to extract variables used in the component expression - # below. - projects: - - # A project matcher which evaluates a regular expression against the - # project's 'name' attribute. If it matches, the named capture group - # "service_name" is stored as a variable. - - name: - regex: '^my-retail-store/(?P\w+):' - # This matcher only applies to projects from Snyk's Github integration - origin: github - - # A project matcher which extracts service_name from a container image - # project. - - name: - regex: '^(?P\w+):' - origin: ecr - - # A project matcher which matches and extracts from the target - # relationship. - - target: - url: - regex: 'http://github.com/my-retail-store/(?P\w+)\.git' - origin: cli - - # Define the component tag for all matching projects. Snyk recommends a - # Package URL (pURL) format beginning with `pkg:` for use with Insights. - # Named capture values extracted in the matchers above may be interpolated - # here as variables, using Python's fstring formatting convention. - # - # Note that if a variable is used, the named capture must be present in all - # project matchers defined above. - component: 'pkg:github/my-retail-store/{service_name}@main' -``` - -Matchers operate on objects which are simplified from Projects API responses. Only these fields are supported -- though note that not all projects set all of these fields. The fields are shown below in YAML format, commented with their mapping from Projects REST API resources. - -``` -- name: '...' # from data.attributes.name - origin: '...' # from data.attributes.origin - target: - display_name: '...' # from relationships.target.data.attributes.display_name - url: '...' # from relationships.target.data.attributes.url - target_reference: '...' # from data.attributes.target_reference -``` diff --git a/docs/components.md b/docs/components.md new file mode 100644 index 0000000..c500124 --- /dev/null +++ b/docs/components.md @@ -0,0 +1,119 @@ + +### Defining software component tags for Snyk Insights + +I want to add `component` tags on projects in my Snyk Organization for [Snyk Insights](https://docs.snyk.io/manage-issues/insights/insights-setup/insights-setup-associating-snyk-open-source-code-and-container-projects), based on rules which match and extract certain features of project and attributes. See section on [Component Tags](#component-tags-for-snyk-insights) below. + +```bash +snyk-tags component tag --org-id=abc rules.yaml +``` + +I want to preview component tag processing changes before applying them. + +```bash +snyk-tags component tag --dry-run rules.yaml +``` + +I want to remove all component tags, as determined by the same rules. + +```bash +snyk-tags component tag --remove rules.yaml +``` + +I want to replace _all_ component tags that might exist on matching projects with only those specified by the rules. + +```bash +snyk-tags component tag --exclusive rules.yaml +``` + +I want to remove _all_ component tags from matching projects. + +```bash +snyk-tags component tag --remove --exclusive rules.yaml +``` + +#### Formatting options + +I want to store a CSV report of component tag rule processing to a file. + +```bash +snyk-tags component tag --format csv rules.yaml | tee component-tags.csv +``` + +I want to append a newline-delimited JSON (ndjson) log of component tag processing to a file. + +```bash +snyk-tags component tag --format json rules.yaml | tee -a component-tags.ndjson +``` + +## Component tags for Snyk Insights + +Part of the setup process for Snyk Insights involves associating Snyk Open Source, Code and Container projects together. For a large organization this can be a daunting task. The `snyk-tags component tag` command allows automating the application of such tags based on regular expression matching and extraction. + +This tool may be run up-front as part of onboarding, but also as a regular batch job. This allows component tags to be more centrally managed across an organization. + +### Component rules + +The format for the rules file is as follows: + +```yaml +# 'version' is the version of this rules file format, currently 1 +version: 1 + +# 'rules' is an array of rule objects. +# Rule objects are evaluated against each project in the specified --org-id +# The first rule that matches is used to tag the project with its component: tag. +# Rules are applied in the order in which they appear in this file. +rules: + + # A rule which normalizes component names across different types of projects. + # If you inspect the contents of Snyk projects, you'll find that different + # origins contain different identifiers and in different formats. + - name: my-retail-store + + # 'projects' is a list of project matchers. Just like the rules, these are + # applied in the order in which they are defined here, the first one that + # matches is used to extract variables used in the component expression + # below. + projects: + + # A project matcher which evaluates a regular expression against the + # project's 'name' attribute. If it matches, the named capture group + # "service_name" is stored as a variable. + - name: + regex: '^my-retail-store/(?P\w+):' + # This matcher only applies to projects from Snyk's Github integration + origin: github + + # A project matcher which extracts service_name from a container image + # project. + - name: + regex: '^(?P\w+):' + origin: ecr + + # A project matcher which matches and extracts from the target + # relationship. + - target: + url: + regex: 'http://github.com/my-retail-store/(?P\w+)\.git' + origin: cli + + # Define the component tag for all matching projects. Snyk recommends a + # Package URL (pURL) format beginning with `pkg:` for use with Insights. + # Named capture values extracted in the matchers above may be interpolated + # here as variables, using Python's fstring formatting convention. + # + # Note that if a variable is used, the named capture must be present in all + # project matchers defined above. + component: 'pkg:github/my-retail-store/{service_name}@main' +``` + +Matchers operate on objects which are simplified from Projects API responses. Only these fields are supported -- though note that not all projects set all of these fields. The fields are shown below in YAML format, commented with their mapping from Projects REST API resources. + +``` +- name: '...' # from data.attributes.name + origin: '...' # from data.attributes.origin + target: + display_name: '...' # from relationships.target.data.attributes.display_name + url: '...' # from relationships.target.data.attributes.url + target_reference: '...' # from data.attributes.target_reference +``` diff --git a/mkdocs.yml b/mkdocs.yml index d6a21b9..704d999 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,6 +1,7 @@ site_name: Snyk Tags Tool nav: - Home: README.md + - Components: components.md theme: name: readthedocs From 89d6bfcb1b4484e3fc463791fb5edb505e969135 Mon Sep 17 00:00:00 2001 From: Casey Marshall Date: Tue, 7 Nov 2023 18:42:38 -0600 Subject: [PATCH 3/4] chore: improve component tags section with examples Introduce the `component tags` sub command with the purpose of the command, followed by a progressive series of rule examples illustrating how the rules work and how to write them to address various use cases. --- docs/components.md | 188 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 150 insertions(+), 38 deletions(-) diff --git a/docs/components.md b/docs/components.md index c500124..cdf94e5 100644 --- a/docs/components.md +++ b/docs/components.md @@ -1,59 +1,136 @@ +# Component tags -### Defining software component tags for Snyk Insights +## What are component tags? -I want to add `component` tags on projects in my Snyk Organization for [Snyk Insights](https://docs.snyk.io/manage-issues/insights/insights-setup/insights-setup-associating-snyk-open-source-code-and-container-projects), based on rules which match and extract certain features of project and attributes. See section on [Component Tags](#component-tags-for-snyk-insights) below. +Component tags are used to describe how your software is composed, packaged and deployed, so that Snyk can relate security analysis findings to your cloud estate. -```bash -snyk-tags component tag --org-id=abc rules.yaml -``` +[Component tags](https://docs.snyk.io/manage-risk/insights/insights-setup/insights-setup-associating-snyk-open-source-code-and-container-projects) are used by [Snyk Insights](https://docs.snyk.io/manage-risk/insights) to associate related Snyk projects together, so that Snyk can provide contextual analysis of the actual risk when vulnerabilities are discovered in these software assets across the SDLC. -I want to preview component tag processing changes before applying them. +## How does `snyk-tags` help manage component tags? -```bash -snyk-tags component tag --dry-run rules.yaml -``` +For larger enterprise software estates, manually creating such tags, or even scripting them in CI/CD pipelines can be cumbersome and difficult to effectively govern at scale. When you have tens of thousands of SCM repositories and images, how do you manage all these definitions? -I want to remove all component tags, as determined by the same rules. +The `snyk-tags component` approach to managing this, is to use the naming conventions and structure which enterprises have already standardized on. With rules based on regular expressions, such conventions can usually be expressed declaratively and applied to bulk tag a large number of Snyk projects. -```bash -snyk-tags component tag --remove rules.yaml -``` +## How are component tag rules defined? -I want to replace _all_ component tags that might exist on matching projects with only those specified by the rules. +A `rules.yaml` file defines one or more _rules_. Each _rule_ matches one or more attributes of Snyk projects with exact values or a regular expression. _Named group captures_ in a regular expression can extract parts of these values into _variables_, which may be then referenced in a component tag expression which is evaluated and applied to the matching project. -```bash -snyk-tags component tag --exclusive rules.yaml +### Working with fixed project attribute values + +A very simple rule performs an exact match on the project name, and sets a fixed string value as the component tag: + +```yaml +version: 1 +rules: +- name: fixed-project-name-example + projects: + - name: my-example-project + component: pkg:github/my-org/my-example-project@main ``` -I want to remove _all_ component tags from matching projects. +The component tag follows a [Package URL](https://github.com/package-url/purl-spec) (pURL) convention. Some assumptions encoded into this pURL: -```bash -snyk-tags component tag --remove --exclusive rules.yaml +- The software component is identified by a Github repo `my-example-project`, owned by organization `my-org`. +- The `main` branch of this repository was tested. + +This component identifier is somewhat arbitrary, but a pURL is a useful reference to the component's source, and is also conventionally used in various SBOM formats. + +### Regular expressions and variable capture + +Imagine you have 10,000 projects in your Github org, all named similarly, but with different substrings in the repo name. To use the above "fixed value" type of rule, you'd need to define 10,000 such rules to assign a component tag to each. + +Tagging such projects with a regular expression rule looks like this: + +```yaml +version: 1 +rules: +- name: regex-project-name-example + projects: + - name: + regex: '^my-org/(?P\w+):' + component: pkg:github/my-org/{project}@main ``` -#### Formatting options +A few observations about this rule worth highlighting: -I want to store a CSV report of component tag rule processing to a file. +- The project `name` can either be a fixed string, or an object with a `regex` attribute, which defines a regular expression match and capture for the attribute. +- The regular expression is matching all project names that start with `my-org/`. +- The word following `my-org/` (`\w+` matches one or more alphanumeric characters) is captured to a variable named `project` (that's what the `?P` does in the parenthesized group). +- The captured `project` variable is used in the component tag expression. -```bash -snyk-tags component tag --format csv rules.yaml | tee component-tags.csv +### Normalizing over multiple project naming conventions + +Unfortunately Snyk doesn't always name projects the same way. There are several reasons for this, but generally it's because projects (the subject of a Snyk test) are identified differently depending on what kind of test is being run. Snyk Container project names, for example, can reference an OCI image repository location, while Snyk Open Source and Snyk Code will generally reference source code repository locations. + +We can define multiple project pattern matchers within the same rule to normalize over these differences. Again, this is how we teach Snyk Insights "the subjects of these tests are all part of the same 'software component'". + +```yaml +version: 1 +rules: +- name: regex-multiple-snyk-projects + projects: + - name: + regex: '^my-org/(?P\w+):' + - name: + regex: '^(?P\w+):' + origin: ecr + component: pkg:github/my-org/{project}@main ``` -I want to append a newline-delimited JSON (ndjson) log of component tag processing to a file. +Comparing with the previous example, we've added an additional project matcher. This matcher: + +- Extracts the first alphanumeric string from the name preceding a `:` as the `project` variable... +- ...but only when the project `origin` is `ecr`. + +Projects with `origin: ecr` indicate a container in an [Amazon Elastic Container Registry (ECR)](https://docs.aws.amazon.com/AmazonECR/latest/userguide/what-is-ecr.html) was tested with Snyk Container. + +Project matchers are evaluated on each project in the same order in which they are defined in the rule, until there is a match. The variables extracted from the match are used in the `component` expression to tag the project. If there is no match, the project is not tagged. + +Note that the same variables must be captured in each matcher, in order to reference them in the `component` tag expression. + +### Project attributes available for matching + +The following project attributes may be used in a project matcher: + +- `name` +- `origin` +- `target.display_name` +- `target.url` +- `target_reference` + +For best results when developing rules, explore the values of these attributes in your projects with the [Snyk REST API](https://apidocs.snyk.io). + +Matchers operate on objects which are simplified from Projects API responses. Only these fields are supported -- though note that not all projects set all of these fields. The matcher fields are shown below in YAML format, with commentary relating each to Projects REST API resources. -```bash -snyk-tags component tag --format json rules.yaml | tee -a component-tags.ndjson +``` +projects: +- name: '...' # from data.attributes.name + origin: '...' # from data.attributes.origin + target: + display_name: '...' # from relationships.target.data.attributes.display_name + url: '...' # from relationships.target.data.attributes.url + target_reference: '...' # from data.attributes.target_reference ``` -## Component tags for Snyk Insights +#### Target attributes -Part of the setup process for Snyk Insights involves associating Snyk Open Source, Code and Container projects together. For a large organization this can be a daunting task. The `snyk-tags component tag` command allows automating the application of such tags based on regular expression matching and extraction. +`target` sub-attributes are defined as a sub-object of the project. `target.url` for example, can be used in a rule like this: -This tool may be run up-front as part of onboarding, but also as a regular batch job. This allows component tags to be more centrally managed across an organization. +```yaml +version: 1 +rules: +- name: target-url-example + projects: + - target: + url: + regex: 'http(s)?://github.com/my-org/(?P\w+).git' + component: pkg:github/my-org/{project}@main +``` -### Component rules +# Complete rules file example with commentary -The format for the rules file is as follows: +A `rules.yaml` can contain multiple rules. These are evaluated by `snyk-tags component` in the same order they are defined, on each project in a Snyk org. The first rule that matches a project is used to tag that project. ```yaml # 'version' is the version of this rules file format, currently 1 @@ -107,13 +184,48 @@ rules: component: 'pkg:github/my-retail-store/{service_name}@main' ``` -Matchers operate on objects which are simplified from Projects API responses. Only these fields are supported -- though note that not all projects set all of these fields. The fields are shown below in YAML format, commented with their mapping from Projects REST API resources. +## `snyk-tags component` command-line usage + +I want to add `component` tags on projects in my Snyk Organization for [Snyk Insights](https://docs.snyk.io/manage-issues/insights/insights-setup/insights-setup-associating-snyk-open-source-code-and-container-projects), based on rules which match and extract certain features of project and attributes. +```bash +snyk-tags component tag --org-id=abc rules.yaml ``` -- name: '...' # from data.attributes.name - origin: '...' # from data.attributes.origin - target: - display_name: '...' # from relationships.target.data.attributes.display_name - url: '...' # from relationships.target.data.attributes.url - target_reference: '...' # from data.attributes.target_reference + +I want to preview component tag processing changes before applying them. + +```bash +snyk-tags component tag --dry-run rules.yaml +``` + +I want to remove all component tags, as determined by the same rules. + +```bash +snyk-tags component tag --remove rules.yaml +``` + +I want to replace _all_ component tags that might exist on matching projects with only those specified by the rules. + +```bash +snyk-tags component tag --exclusive rules.yaml +``` + +I want to remove _all_ component tags from matching projects. + +```bash +snyk-tags component tag --remove --exclusive rules.yaml +``` + +#### Formatting options + +I want to store a CSV report of component tag rule processing to a file. + +```bash +snyk-tags component tag --format csv rules.yaml | tee component-tags.csv +``` + +I want to append a newline-delimited JSON (ndjson) log of component tag processing to a file. + +```bash +snyk-tags component tag --format json rules.yaml | tee -a component-tags.ndjson ``` From 6a69579b83c2f4cdca6e128664ddfbac1d178cba Mon Sep 17 00:00:00 2001 From: Casey Marshall Date: Mon, 22 Jan 2024 14:43:13 -0600 Subject: [PATCH 4/4] fix: project target matching Fixes an issue where related target attributes weren't available for matching. Credits to @thavelock for discovering and helping debug the issue. --- snyk_tags/component.py | 2 +- tests/test_component.py | 65 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/snyk_tags/component.py b/snyk_tags/component.py index 5ceb27c..b2b0192 100644 --- a/snyk_tags/component.py +++ b/snyk_tags/component.py @@ -194,7 +194,7 @@ def tag( # Clear context as this dict is (re)used in-place with each # execution of the project matcher rules. context.clear() - component = match_fn(project.get("attributes", {})) + component = match_fn(project_obj) if not component: # Rule did not match continue diff --git a/tests/test_component.py b/tests/test_component.py index 5e9f5a6..1e59df8 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -72,6 +72,71 @@ def test_component_tag_match_dry_run(tmpdir, httpx_mock): ) +def test_component_tag_match_target_dry_run(tmpdir, httpx_mock): + rules_file = tmpdir.join("rules.yaml") + rules_file.write( + """ +version: 1 +rules: + - name: test + projects: + - target: + url: + regex: '.*/(?P\S+)/(?P\S+)$' + component: '{org}-{proj}-component' +""" + ) + httpx_mock.add_response( + method="GET", + url=re.compile("^.*/orgs/some-org/projects[?].*"), + json={ + "data": [ + { + "id": "some-project", + "attributes": { + "name": "test", + }, + "relationships": { + "target": { + "data": { + "attributes": { + "display_name": "some-org/java-goof", + "url": "https://github.com/some-org/java-goof", + }, + }, + }, + }, + }, + ], + }, + ) + httpx_mock.add_response( + method="POST", url=re.compile("^.*/org/some-org/project/some-project/tags$") + ) + httpx_mock.add_response( + status_code=400 + ) # catch-all response, otherwise backoff retry will block testing + + result = runner.invoke( + app, + [ + "component", + "tag", + "--org-id", + "some-org", + "--snyktkn", + "some-token", + "--dry-run", + str(rules_file), + ], + ) + assert result.exit_code == 0 + assert ( + """would add tag "component:some-org-java-goof-component" in project id="some-project" name="test\"""" + in result.stdout + ) + + def test_component_tag_match_added(tmpdir, httpx_mock): rules_file = tmpdir.join("rules.yaml") rules_file.write(