From 02da2438af97cc01290bba072e379bd77f9aaa75 Mon Sep 17 00:00:00 2001 From: Sandy Carter Date: Sun, 19 May 2024 22:34:44 -0400 Subject: [PATCH] Use task queue to spawn multiple processes of tidy --- action.yml | 5 + .../clang_tidy_review/__init__.py | 193 +++++++++++++----- .../clang_tidy_review/review.py | 8 + tests/test_review.py | 14 +- 4 files changed, 160 insertions(+), 60 deletions(-) diff --git a/action.yml b/action.yml index 0b864bd..df59943 100644 --- a/action.yml +++ b/action.yml @@ -61,6 +61,10 @@ inputs: description: "Use annotations instead of comments. See README for limitations on annotations" required: false default: false + parallel: + description: "Number of tidy instances to be run in parallel. Zero will automatically determine the right number." + required: false + default: "0" pr: default: ${{ github.event.pull_request.number }} repo: @@ -88,3 +92,4 @@ runs: - --lgtm-comment-body='${{ inputs.lgtm_comment_body }}' - --split_workflow=${{ inputs.split_workflow }} - --annotations=${{ inputs.annotations }} + - --parallel=${{ inputs.parallel }} diff --git a/post/clang_tidy_review/clang_tidy_review/__init__.py b/post/clang_tidy_review/clang_tidy_review/__init__.py index fca4ded..ae7774a 100644 --- a/post/clang_tidy_review/clang_tidy_review/__init__.py +++ b/post/clang_tidy_review/clang_tidy_review/__init__.py @@ -9,7 +9,14 @@ import glob import itertools import json +import multiprocessing import os +import queue +import shutil +import sys +import tempfile +import threading +import traceback from operator import itemgetter import pprint import pathlib @@ -161,50 +168,48 @@ def get_auth_from_arguments(args: argparse.Namespace) -> Auth: def build_clang_tidy_warnings( - line_filter, - build_dir, - clang_tidy_checks, - clang_tidy_binary: pathlib.Path, - config_file, - files, - username: str, + base_invocation: List, + env: dict, + tmpdir: str, + task_queue: queue.Queue, + lock: threading.Lock, + failed_files: List, ) -> None: - """Run clang-tidy on the given files and save output into FIXES_FILE""" + """Run clang-tidy on the given files and save output into a temporary file""" - config = config_file_or_checks(clang_tidy_binary, clang_tidy_checks, config_file) + while True: + name = task_queue.get() + invocation = base_invocation[:] - args = [ - clang_tidy_binary, - f"-p={build_dir}", - f"-line-filter={line_filter}", - f"--export-fixes={FIXES_FILE}", - "--enable-check-profile", - f"-store-check-profile={PROFILE_DIR}", - ] + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, fixes_file) = tempfile.mkstemp(suffix=".yaml", dir=tmpdir) + os.close(handle) + invocation.append(f"--export-fixes={fixes_file}") - if config: - print(f"Using config: {config}") - args.append(config) - else: - print("Using recursive directory config") + invocation.append(name) - args += files - - try: - with message_group(f"Running:\n\t{args}"): - env = dict(os.environ) - env["USER"] = username - subprocess.run( - args, - capture_output=True, - check=True, - encoding="utf-8", - env=env, - ) - except subprocess.CalledProcessError as e: - print( - f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}" + proc = subprocess.Popen( + invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env ) + output, err = proc.communicate() + end = datetime.datetime.now() + + if proc.returncode != 0: + if proc.returncode < 0: + msg = f"{name}: terminated by signal {-proc.returncode}\n" + err += msg.encode("utf-8") + failed_files.append(name) + with lock: + subprocess.list2cmdline(invocation) + sys.stdout.write( + f'{name}: {subprocess.list2cmdline(invocation)}\n{output.decode("utf-8")}' + ) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode("utf-8")) + + task_queue.task_done() def clang_tidy_version(clang_tidy_binary: pathlib.Path): @@ -250,11 +255,33 @@ def config_file_or_checks( return "--config" -def load_clang_tidy_warnings(): - """Read clang-tidy warnings from FIXES_FILE. Can be produced by build_clang_tidy_warnings""" +def merge_replacement_files(tmpdir: str, mergefile: str): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey = "Diagnostics" + merged = [] + for replacefile in glob.iglob(os.path.join(tmpdir, "*.yaml")): + content = yaml.safe_load(open(replacefile, "r")) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = {"MainSourceFile": "", mergekey: merged} + with open(mergefile, "w") as out: + yaml.safe_dump(output, out) + + +def load_clang_tidy_warnings(fixes_file) -> Dict: + """Read clang-tidy warnings from fixes_file. Can be produced by build_clang_tidy_warnings""" try: - with open(FIXES_FILE, "r") as fixes_file: - return yaml.safe_load(fixes_file) + with open(fixes_file, "r") as file: + return yaml.safe_load(file) except FileNotFoundError: return {} @@ -824,7 +851,9 @@ def create_review_file( return review -def make_timing_summary(clang_tidy_profiling: Dict, sha: Optional[str] = None) -> str: +def make_timing_summary( + clang_tidy_profiling: Dict, real_time: datetime.timedelta, sha: Optional[str] = None +) -> str: if not clang_tidy_profiling: return "" top_amount = 10 @@ -901,7 +930,9 @@ def make_timing_summary(clang_tidy_profiling: Dict, sha: Optional[str] = None) - c = decorate_check_names(f"[{c}]").replace("[[", "[").rstrip("]") check_summary += f"|{c}|{u:.2f}|{s:.2f}|{w:.2f}|\n" - return f"## Timing\n{file_summary}{check_summary}" + return ( + f"## Timing\nReal time: {real_time.seconds:.2f}\n{file_summary}{check_summary}" + ) def filter_files(diff, include: List[str], exclude: List[str]) -> List: @@ -923,6 +954,7 @@ def create_review( clang_tidy_checks: str, clang_tidy_binary: pathlib.Path, config_file: str, + max_task: int, include: List[str], exclude: List[str], ) -> Optional[PRReview]: @@ -931,6 +963,9 @@ def create_review( """ + if max_task == 0: + max_task = multiprocessing.cpu_count() + diff = pull_request.get_pr_diff() print(f"\nDiff from GitHub PR:\n{diff}\n") @@ -970,18 +1005,68 @@ def create_review( username = pull_request.get_pr_author() or "your name here" # Run clang-tidy with the configured parameters and produce the CLANG_TIDY_FIXES file - build_clang_tidy_warnings( - line_ranges, - build_dir, - clang_tidy_checks, + return_code = 0 + export_fixes_dir = tempfile.mkdtemp() + env = dict(os.environ, USER=username) + config = config_file_or_checks(clang_tidy_binary, clang_tidy_checks, config_file) + base_invocation = [ clang_tidy_binary, - config_file, - files, - username, - ) + f"-p={build_dir}", + f"-line-filter={line_ranges}", + "--enable-check-profile", + f"-store-check-profile={PROFILE_DIR}", + ] + if config: + print(f"Using config: {config}") + base_invocation.append(config) + else: + print("Using recursive directory config") + + print(f"Spawning a task queue with {max_task} processes") + start = datetime.datetime.now() + try: + # Spin up a bunch of tidy-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread( + target=build_clang_tidy_warnings, + args=( + base_invocation, + env, + export_fixes_dir, + task_queue, + lock, + failed_files, + ), + ) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print("\nCtrl-C detected, goodbye.") + os.kill(0, 9) + raise + real_duration = datetime.datetime.now() - start # Read and parse the CLANG_TIDY_FIXES file - clang_tidy_warnings = load_clang_tidy_warnings() + print("Writing fixes to " + FIXES_FILE + " ...") + merge_replacement_files(export_fixes_dir, FIXES_FILE) + shutil.rmtree(export_fixes_dir) + clang_tidy_warnings = load_clang_tidy_warnings(FIXES_FILE) # Read and parse the timing data clang_tidy_profiling = load_and_merge_profiling() @@ -992,7 +1077,7 @@ def create_review( sha = os.environ.get("GITHUB_SHA") # Post to the action job summary - step_summary = make_timing_summary(clang_tidy_profiling, sha) + step_summary = make_timing_summary(clang_tidy_profiling, real_duration, sha) set_summary(step_summary) print("clang-tidy had the following warnings:\n", clang_tidy_warnings, flush=True) diff --git a/post/clang_tidy_review/clang_tidy_review/review.py b/post/clang_tidy_review/clang_tidy_review/review.py index dbb76f7..0d1d165 100755 --- a/post/clang_tidy_review/clang_tidy_review/review.py +++ b/post/clang_tidy_review/clang_tidy_review/review.py @@ -112,6 +112,13 @@ def main(): type=bool_argument, default=False, ) + parser.add_argument( + "-j", + "--parallel", + help="Number of tidy instances to be run in parallel.", + type=int, + default=0, + ) parser.add_argument( "--dry-run", help="Run and generate review, but don't post", action="store_true" ) @@ -157,6 +164,7 @@ def main(): args.clang_tidy_checks, args.clang_tidy_binary, args.config_file, + args.parallel, include, exclude, ) diff --git a/tests/test_review.py b/tests/test_review.py index 9fa1527..781aaed 100644 --- a/tests/test_review.py +++ b/tests/test_review.py @@ -1,3 +1,5 @@ +import datetime + import clang_tidy_review as ctr import difflib @@ -235,10 +237,10 @@ def test_line_ranges(): assert line_ranges == expected_line_ranges -def test_load_clang_tidy_warnings(monkeypatch): - monkeypatch.setattr(ctr, "FIXES_FILE", str(TEST_DIR / f"src/test_{ctr.FIXES_FILE}")) - - warnings = ctr.load_clang_tidy_warnings() +def test_load_clang_tidy_warnings(): + warnings = ctr.load_clang_tidy_warnings( + str(TEST_DIR / f"src/test_{ctr.FIXES_FILE}") + ) assert sorted(list(warnings.keys())) == ["Diagnostics", "MainSourceFile"] assert warnings["MainSourceFile"] == "/clang_tidy_review/src/hello.cxx" @@ -470,5 +472,5 @@ def test_timing_summary(monkeypatch): assert "time.clang-tidy.total.wall" in profiling["hello.cxx"].keys() assert "time.clang-tidy.total.user" in profiling["hello.cxx"].keys() assert "time.clang-tidy.total.sys" in profiling["hello.cxx"].keys() - summary = ctr.make_timing_summary(profiling) - assert len(summary.split("\n")) == 21 + summary = ctr.make_timing_summary(profiling, datetime.timedelta(seconds=42)) + assert len(summary.split("\n")) == 22