diff --git a/scripts/lint.py b/scripts/lint.py index fb9c5b82bb265..0e18c2e44ab98 100755 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -625,6 +625,7 @@ def main() -> None: "./crates/re_types_builder/src/reflection.rs", # auto-generated "./examples/rust/objectron/src/objectron.rs", # auto-generated "./scripts/lint.py", # we contain all the patterns we are linting against + "./scripts/zombie_todos.py", "./web_viewer/re_viewer.js", # auto-generated by wasm_bindgen "./web_viewer/re_viewer_debug.js", # auto-generated by wasm_bindgen } diff --git a/scripts/requirements-dev.txt b/scripts/requirements-dev.txt index 4be81c877cfd9..bef6b944e98f6 100644 --- a/scripts/requirements-dev.txt +++ b/scripts/requirements-dev.txt @@ -12,3 +12,4 @@ tqdm requests gitignore_parser # handle .gitignore python-frontmatter==1.0.0 +aiohttp diff --git a/scripts/zombie_todos.py b/scripts/zombie_todos.py new file mode 100755 index 0000000000000..4637190eef920 --- /dev/null +++ b/scripts/zombie_todos.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +from __future__ import annotations + +import argparse +import asyncio +import os +import re + +import aiohttp +from gitignore_parser import parse_gitignore + +# --- + +parser = argparse.ArgumentParser(description="Hunt down zombie TODOs.") +parser.add_argument("--token", dest="GITHUB_TOKEN", help="Github token to fetch issues", required=True) + +args = parser.parse_args() + +# --- Fetch issues from Github API --- + +headers = { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {args.GITHUB_TOKEN}", + "X-GitHub-Api-Version": "2022-11-28", +} + +issues = [] + +repo_owner = "rerun-io" +repo_name = "rerun" +issue_state = "closed" +per_page = 100 + + +async def fetch_issue_page(session, page): + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/issues?state={issue_state}&per_page={per_page}&page={page}" + async with session.get(url, headers=headers) as response: + if response.status != 200: + print(f"Error: Failed to fetch issues from page {page}. Status code: {response.status}") + return [] + data = await response.json() + return [issue["number"] for issue in data] + + +async def fetch_total_number_of_issue_pages(session): + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/issues?state={issue_state}&per_page={per_page}" + async with session.get(url, headers=headers) as response: + if response.status != 200: + print(f"Error: Failed to fetch total pages. Status code: {response.status}") + return None + link_header = response.headers.get("Link") + if link_header: + match = re.search(r'page=(\d+)>; rel="last"', link_header) + if match: + return int(match.group(1)) + return None + + +async def fetch_issues(): + async with aiohttp.ClientSession() as session: + total_pages = await fetch_total_number_of_issue_pages(session) + if total_pages is None: + print("Failed to determine the number of pages.") + return + + tasks = [fetch_issue_page(session, page) for page in range(1, total_pages + 1)] + issue_lists = await asyncio.gather(*tasks) + issues.extend(issue for issue_list in issue_lists for issue in issue_list) + + +# --- Check files for zombie TODOs --- + + +internal_issue_number_pattern = re.compile(r"TODO\((?:#(\d+))(?:,\s*(?:#(\d+)))*\)") + + +def check_file(path: str) -> None: + closed_issues = set(issues) + with open(path) as f: + for i, line in enumerate(f.readlines()): + matches = internal_issue_number_pattern.search(line) + if matches is not None: + for match in matches.groups(): + if match is not None and int(match) in closed_issues: + print(f"{path}+{i}: {line.strip()}") + + +# --- + + +def main() -> None: + asyncio.run(fetch_issues()) + + script_dirpath = os.path.dirname(os.path.realpath(__file__)) + root_dirpath = os.path.abspath(f"{script_dirpath}/..") + os.chdir(root_dirpath) + + extensions = ["c", "cpp", "fbs", "h", "hpp", "html", "js", "md", "py", "rs", "sh", "toml", "txt", "wgsl", "yml"] + + exclude_paths = { + "./CODE_STYLE.md", + "./scripts/lint.py", + "./scripts/zombie_todos.py", + } + + should_ignore = parse_gitignore(".gitignore") # TODO(emilk): parse all .gitignore files, not just top-level + + for root, dirs, files in os.walk(".", topdown=True): + dirs[:] = [d for d in dirs if not should_ignore(d)] + + for filename in files: + extension = filename.split(".")[-1] + if extension in extensions: + filepath = os.path.join(root, filename) + if should_ignore(filepath): + continue + if filepath not in exclude_paths: + check_file(filepath) + + +if __name__ == "__main__": + main()