From 7bbdd9db406d3e5960a0d374ff451e05e0da2a5e Mon Sep 17 00:00:00 2001 From: Jon Massey Date: Tue, 21 May 2024 17:27:45 +0100 Subject: [PATCH] Use Repo type for repo field and fully populate For consistency with other dataclasses declared in github.py, use the Repo type for a Codespace's repo attribute. At the time of writing, the organisation codespaces API endpoint does not provide all the data required for this type, and since we are retreiving info on codespaces non-Tech team repos/orgs, extra API calls are required to get the required information (teams, repos, repo metadata) --- INSTALL.md | 4 ++-- metrics/github/github.py | 30 +++++++++++++++++++---------- metrics/github/metrics.py | 4 ++-- metrics/github/query.py | 23 +++++++++++++++++++++- tests/metrics/github/test_github.py | 26 +++++++++++++++++++++++-- 5 files changed, 70 insertions(+), 17 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 761a879..91d29c8 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -25,9 +25,9 @@ Code scanning alerts, Dependabot alerts, Metadata, Pull requests and Repository The `GITHUB_OS_TOKEN` is a fine-grained GitHub personal access token that is used for authenticating with the GitHub REST API. It is assigned to a single organisation and should have the following *read-only* permissions: -* organisation permissions: codespaces +* organisation permissions: Organisation codespaces and Members * *all repositories* owned by the organisation with the following permissions: -Codespaces and Metadata +Codespaces, Metadata, and Repository security advisories ## Disable checks Dokku performs health checks on apps during deploy by sending requests to port 80. diff --git a/metrics/github/github.py b/metrics/github/github.py index 08270a1..9651206 100644 --- a/metrics/github/github.py +++ b/metrics/github/github.py @@ -110,26 +110,36 @@ def from_dict(cls, data, repo): @dataclass(frozen=True) class Codespace: - org: str - # The Repo type requires fields neither returned by the codespaces - # endpoint, nor required for codespaces metrics so str for repo name - repo: str + repo: Repo user: str created_at: datetime.datetime last_used_at: datetime.datetime @classmethod - def from_dict(cls, **kwargs): - return cls(**kwargs) + def from_dict(cls, repo, **kwargs): + if "repo_name" in kwargs: + del kwargs["repo_name"] + return cls(repo, **kwargs) def codespaces(org): + org_teams = teams(org) + ownership = _repo_owners(org, org_teams) + repos = { + r["name"]: Repo.from_dict(r, org=org, team=ownership.get(r["name"], None)) + for r in query.repos(org) + } + return [ - Codespace.from_dict(**({"org": org} | codespace)) - for codespace in query.codespaces(org) + Codespace.from_dict(repo=repos[c["repo_name"]], **c) + for c in query.codespaces(org) ] +def teams(org): + return [t["name"] for t in query.teams(org)] + + def tech_prs(): tech_team_members = _tech_team_members() return [ @@ -165,8 +175,8 @@ def _get_repos(): return repos -def _repo_owners(org): - return {repo: team for team in _TECH_TEAMS for repo in query.team_repos(org, team)} +def _repo_owners(org, teams=_TECH_TEAMS): + return {repo: team for team in teams for repo in query.team_repos(org, team)} def _tech_team_members(): diff --git a/metrics/github/metrics.py b/metrics/github/metrics.py index b660bbc..cfcfa66 100644 --- a/metrics/github/metrics.py +++ b/metrics/github/metrics.py @@ -81,8 +81,8 @@ def get_codespaces_metrics(codespaces): return [ { "created_at": c.created_at, - "organisation": c.org, - "repo": c.repo, + "organisation": c.repo.org, + "repo": c.repo.name, "user": c.user, "last_used_at": c.last_used_at, } diff --git a/metrics/github/query.py b/metrics/github/query.py index 8cb7061..3f13a11 100644 --- a/metrics/github/query.py +++ b/metrics/github/query.py @@ -45,6 +45,27 @@ def team_members(org, team): yield member["login"] +def teams(org): + query = """ + query teams($cursor:String, $org: String!) { + organization(login: $org) { + teams(first:100, after: $cursor) { + nodes { + name + } + pageInfo{ + endCursor + hasNextPage + } + } + } + } + """ + return maybe_truncate( + _client().graphql_query(query, path=["organization", "teams"], org=org) + ) + + def vulnerabilities(org, repo): query = """ query vulnerabilities($cursor: String, $org: String!, $repo: String!) { @@ -146,7 +167,7 @@ def codespaces(org): for codespace in codespaces: yield { "user": codespace["owner"]["login"], - "repo": codespace["repository"]["name"], + "repo_name": codespace["repository"]["name"], "created_at": codespace["created_at"], "last_used_at": codespace["last_used_at"], } diff --git a/tests/metrics/github/test_github.py b/tests/metrics/github/test_github.py index 18d6924..e4ed5c6 100644 --- a/tests/metrics/github/test_github.py +++ b/tests/metrics/github/test_github.py @@ -42,14 +42,36 @@ def test_codespaces(patch): "opensafely": [ { "user": "testuser", - "repo": "testrepo", + "repo_name": "testrepo", + "created_at": datetime.datetime.now().isoformat(), + "last_used_at": datetime.datetime.now().isoformat(), + }, + { + "user": "testuser", + "repo_name": "testrepo2", "created_at": datetime.datetime.now().isoformat(), "last_used_at": datetime.datetime.now().isoformat(), }, ] }, ) - assert len(github.codespaces("opensafely")) == 1 + patch("teams", {"opensafely": [{"name": "testteam"}]}) + # Expect that at least one repo will not be assigned to a team + patch("team_repos", {"opensafely": {"testteam": ["testrepo"]}}) + # Expect that there will be repositories without codespaces + patch( + "repos", + { + "opensafely": [ + repo_data("testrepo"), + repo_data("testrepo2"), + repo_data("testrepo3"), + ] + }, + ) + + # All codespaces for repos in this org, regardless of team should be counted + assert len(github.codespaces("opensafely")) == 2 def test_includes_tech_owned_repos(patch):