introduce zombie_todos.py

rerun-io · Sep 22, 2023 · 74e71d4 · 74e71d4
1 parent 5acb22b
commit 74e71d4
Show file tree

Hide file tree

Showing 3 changed files with 124 additions and 0 deletions.
diff --git a/scripts/lint.py b/scripts/lint.py
@@ -625,6 +625,7 @@ def main() -> None:
             "./crates/re_types_builder/src/reflection.rs",  # auto-generated
             "./examples/rust/objectron/src/objectron.rs",  # auto-generated
             "./scripts/lint.py",  # we contain all the patterns we are linting against
+            "./scripts/zombie_todos.py",
             "./web_viewer/re_viewer.js",  # auto-generated by wasm_bindgen
             "./web_viewer/re_viewer_debug.js",  # auto-generated by wasm_bindgen
         }

diff --git a/scripts/requirements-dev.txt b/scripts/requirements-dev.txt
@@ -12,3 +12,4 @@ tqdm
 requests
 gitignore_parser  # handle .gitignore
 python-frontmatter==1.0.0
+aiohttp
diff --git a/scripts/zombie_todos.py b/scripts/zombie_todos.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+from __future__ import annotations
+
+import argparse
+import asyncio
+import os
+import re
+
+import aiohttp
+from gitignore_parser import parse_gitignore
+
+# ---
+
+parser = argparse.ArgumentParser(description="Hunt down zombie TODOs.")
+parser.add_argument("--token", dest="GITHUB_TOKEN", help="Github token to fetch issues", required=True)
+
+args = parser.parse_args()
+
+# --- Fetch issues from Github API ---
+
+headers = {
+    "Accept": "application/vnd.github+json",
+    "Authorization": f"Bearer {args.GITHUB_TOKEN}",
+    "X-GitHub-Api-Version": "2022-11-28",
+}
+
+issues = []
+
+repo_owner = "rerun-io"
+repo_name = "rerun"
+issue_state = "closed"
+per_page = 100
+
+
+async def fetch_issue_page(session, page):
+    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/issues?state={issue_state}&per_page={per_page}&page={page}"
+    async with session.get(url, headers=headers) as response:
+        if response.status != 200:
+            print(f"Error: Failed to fetch issues from page {page}. Status code: {response.status}")
+            return []
+        data = await response.json()
+        return [issue["number"] for issue in data]
+
+
+async def fetch_total_number_of_issue_pages(session):
+    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/issues?state={issue_state}&per_page={per_page}"
+    async with session.get(url, headers=headers) as response:
+        if response.status != 200:
+            print(f"Error: Failed to fetch total pages. Status code: {response.status}")
+            return None
+        link_header = response.headers.get("Link")
+        if link_header:
+            match = re.search(r'page=(\d+)>; rel="last"', link_header)
+            if match:
+                return int(match.group(1))
+        return None
+
+
+async def fetch_issues():
+    async with aiohttp.ClientSession() as session:
+        total_pages = await fetch_total_number_of_issue_pages(session)
+        if total_pages is None:
+            print("Failed to determine the number of pages.")
+            return
+
+        tasks = [fetch_issue_page(session, page) for page in range(1, total_pages + 1)]
+        issue_lists = await asyncio.gather(*tasks)
+        issues.extend(issue for issue_list in issue_lists for issue in issue_list)
+
+
+# --- Check files for zombie TODOs ---
+
+
+internal_issue_number_pattern = re.compile(r"TODO\((?:#(\d+))(?:,\s*(?:#(\d+)))*\)")
+
+
+def check_file(path: str) -> None:
+    closed_issues = set(issues)
+    with open(path) as f:
+        for i, line in enumerate(f.readlines()):
+            matches = internal_issue_number_pattern.search(line)
+            if matches is not None:
+                for match in matches.groups():
+                    if match is not None and int(match) in closed_issues:
+                        print(f"{path}+{i}: {line.strip()}")
+
+
+# ---
+
+
+def main() -> None:
+    asyncio.run(fetch_issues())
+
+    script_dirpath = os.path.dirname(os.path.realpath(__file__))
+    root_dirpath = os.path.abspath(f"{script_dirpath}/..")
+    os.chdir(root_dirpath)
+
+    extensions = ["c", "cpp", "fbs", "h", "hpp", "html", "js", "md", "py", "rs", "sh", "toml", "txt", "wgsl", "yml"]
+
+    exclude_paths = {
+        "./CODE_STYLE.md",
+        "./scripts/lint.py",
+        "./scripts/zombie_todos.py",
+    }
+
+    should_ignore = parse_gitignore(".gitignore")  # TODO(emilk): parse all .gitignore files, not just top-level
+
+    for root, dirs, files in os.walk(".", topdown=True):
+        dirs[:] = [d for d in dirs if not should_ignore(d)]
+
+        for filename in files:
+            extension = filename.split(".")[-1]
+            if extension in extensions:
+                filepath = os.path.join(root, filename)
+                if should_ignore(filepath):
+                    continue
+                if filepath not in exclude_paths:
+                    check_file(filepath)
+
+
+if __name__ == "__main__":
+    main()