From faeb74bd967be463c262bf39b9fcea4306b7df9e Mon Sep 17 00:00:00 2001 From: Michal Domonkos Date: Sat, 23 Dec 2023 18:27:01 +0100 Subject: [PATCH] Optimize commit -> changeset resolution Turn this functionality into a Python function so that we don't have to spawn a separate Python subprocess to resolve each commit. This removes a major bottleneck in git-cherry-plan and git-changelog and reduces their runtime from a couple of seconds down to microseconds (as it should be, really). --- devel_doc/common.py | 75 +++++++++++++++++++++++++++++++++ devel_doc/git-changelog | 9 +--- devel_doc/git-changeset | 70 +++--------------------------- devel_doc/git-cherry-plan | 20 ++++----- devel_doc/release_maintaince.md | 16 +++---- 5 files changed, 99 insertions(+), 91 deletions(-) diff --git a/devel_doc/common.py b/devel_doc/common.py index e89cb22..ea51dc4 100644 --- a/devel_doc/common.py +++ b/devel_doc/common.py @@ -1,3 +1,5 @@ +import json +import os import re import subprocess import sys @@ -21,6 +23,77 @@ def backports(branch): res.extend(re.findall(r, log)) return res +def changeset(commit, refresh=False, quiet=False, split=True): + """Return the GitHub PR belonging to the given commit hash.""" + + # Set variables + out = [] + if len(commit) < SHA1_LEN: + commit = shell('git rev-parse {}'.format(commit)) + commit_dir = '{}/changeset/commits/{}'.format(GIT_DIR, commit[:2]) + commit_path = '{}/{}'.format(commit_dir, commit[2:]) + url = 'https://github.com/rpm-software-management/rpm/pull/{}' + if os.path.islink(commit_path): + data_path = os.path.abspath( + os.path.join(commit_dir, os.readlink(commit_path))) + else: + data_path = commit_path + + # Fetch and cache entry if needed + if refresh or not os.path.exists(data_path): + # Fetch data + if not quiet: + sys.stderr.write( + 'Fetching changeset for commit {}\n'.format(commit)) + data = shell('gh pr list --state merged --limit 1 --json ' + 'number,title,url,labels --search {}'.format(commit)) + data = json.loads(data) + os.makedirs(commit_dir, exist_ok=True) + if not data: + with open(commit_path, 'w') as f: + f.write('') + sys.exit(1) + assert len(data) >= 1 + data = data[0] + + # Create entry + number = data['number'] + title = data['title'] + labels = sorted(x['name'] for x in data['labels']) + if 'release' in labels: + entry = '' + else: + entry = '{}\n{}'.format(title, ' '.join(labels)) + number_dir = '{}/changeset/numbers'.format(GIT_DIR) + number_path = '{}/{}'.format(number_dir, number) + + # Write entry to cache + os.makedirs(number_dir, exist_ok=True) + with open(number_path, 'w') as f: + f.write(entry) + if os.path.exists(commit_path): + os.unlink(commit_path) + os.symlink('../../numbers/{}'.format(number), commit_path) + + # Return entry if cached + if os.path.islink(commit_path): + number = os.path.basename(os.readlink(commit_path)) + with open(commit_path) as f: + entry = list(map(str.strip, f.readlines())) + if not entry: + sys.exit(1) + out.append('PR #{}: {}'.format(number, entry[0])) + if len(entry) == 2: + out.append(entry[1]) + else: + out.append('') + out.append(url.format(number)) + + if not split: + out = '\n'.join(out) + + return out + class KeyType: SIMPLE = 0, @@ -65,7 +138,9 @@ def get(self, key, type=KeyType.SIMPLE, default=None): return d +GIT_DIR = shell('git rev-parse --path-format=absolute --git-common-dir') CONFIG = GitConfig('cherryPlan') BACKPORT_RE = ['^\\(cherry picked from commit (.*)\\)$'] + \ CONFIG.get('backportPatterns', KeyType.MULTILINE, []) BACKPORT_RE = [re.compile(r, re.MULTILINE) for r in BACKPORT_RE] +SHA1_LEN = 40 diff --git a/devel_doc/git-changelog b/devel_doc/git-changelog index aebec04..905fe6f 100755 --- a/devel_doc/git-changelog +++ b/devel_doc/git-changelog @@ -7,7 +7,7 @@ import re import subprocess import sys -from common import shell, KeyType, GitConfig, backports +from common import shell, backports, changeset, KeyType, GitConfig class PlainLog(object): @@ -108,12 +108,7 @@ def pulls(branch): ignore = set(IGNORE) for commit in backports(branch): - opts = '' - if args.refresh: - opts += '-r ' - if args.quiet: - opts += '-q ' - lines = shell('git changeset {} {}'.format(opts, commit), split=True) + lines = changeset(commit, args.refresh, args.quiet) if not lines: continue title = lines[0].split(':', 1)[1].strip() diff --git a/devel_doc/git-changeset b/devel_doc/git-changeset index eaa7cbe..0cf7e68 100755 --- a/devel_doc/git-changeset +++ b/devel_doc/git-changeset @@ -6,78 +6,18 @@ import os import subprocess import sys -from common import shell +from common import shell, changeset -# Parse args parser = argparse.ArgumentParser( - description='Print GitHub PR belonging to given refname.') -parser.add_argument('refname', type=str, help='git refname') + description='Print GitHub PR belonging to given commit.') +parser.add_argument('commit', type=str, help='git commit hash') parser.add_argument('-r', '--refresh', action='store_true', help='always fetch from server (do not use cache)') parser.add_argument('-q', '--quiet', action='store_true', help='do not print progress messages on stderr') args = parser.parse_args() -# Set variables -GIT_DIR = shell('git rev-parse --path-format=absolute --git-common-dir') -NUMBER_DIR = '{}/changeset/numbers'.format(GIT_DIR) -COMMIT_HASH = shell('git rev-parse {}'.format(args.refname)) -COMMIT_DIR = '{}/changeset/commits/{}'.format(GIT_DIR, COMMIT_HASH[:2]) -COMMIT_PATH = '{}/{}'.format(COMMIT_DIR, COMMIT_HASH[2:]) -URL = 'https://github.com/rpm-software-management/rpm/pull/{}' -if os.path.islink(COMMIT_PATH): - DATA_PATH = os.path.abspath(os.path.join(COMMIT_DIR, - os.readlink(COMMIT_PATH))) -else: - DATA_PATH = COMMIT_PATH -# Fetch and cache entry if needed -if args.refresh or not os.path.exists(DATA_PATH): - # Fetch data - if not args.quiet: - sys.stderr.write('Fetching changeset for commit {}\n'.format(COMMIT_HASH)) - data = shell('gh pr list --state merged --limit 1 --json ' - 'number,title,url,labels --search {}'.format(COMMIT_HASH)) - data = json.loads(data) - os.makedirs(COMMIT_DIR, exist_ok=True) - if not data: - with open(COMMIT_PATH, 'w') as f: - f.write('') - sys.exit(1) - assert len(data) >= 1 - data = data[0] - - # Create entry - number = data['number'] - title = data['title'] - labels = sorted(x['name'] for x in data['labels']) - if 'release' in labels: - entry = '' - else: - entry = '{}\n{}'.format(title, ' '.join(labels)) - number_path = '{}/{}'.format(NUMBER_DIR, number) - - # Write entry to cache - os.makedirs(NUMBER_DIR, exist_ok=True) - with open(number_path, 'w') as f: - f.write(entry) - if os.path.exists(COMMIT_PATH): - os.unlink(COMMIT_PATH) - os.symlink('../../numbers/{}'.format(number), COMMIT_PATH) - -# Print entry if cached -if os.path.islink(COMMIT_PATH): - number = os.path.basename(os.readlink(COMMIT_PATH)) - with open(COMMIT_PATH) as f: - entry = list(map(str.strip, f.readlines())) - if not entry: - sys.exit(1) - print('PR #{}: {}'.format(number, entry[0])) - if len(entry) == 2: - print(entry[1]) - else: - print() - print(URL.format(number)) -else: - sys.exit(1) +if __name__ == '__main__': + print(changeset(args.commit, args.refresh, args.quiet, split=False)) diff --git a/devel_doc/git-cherry-plan b/devel_doc/git-cherry-plan index eeace13..e6bc5d4 100755 --- a/devel_doc/git-cherry-plan +++ b/devel_doc/git-cherry-plan @@ -8,7 +8,7 @@ import shutil import subprocess import sys -from common import shell, KeyType, GitConfig, backports +from common import shell, backports, changeset, KeyType, GitConfig # @@ -40,7 +40,7 @@ class Commit(object): self.subject) class Changeset(object): - """A group of commits based on git-changeset.""" + """A group of commits forming a logical changeset.""" def __init__(self, title, labels, url, commits, show_labels=True): self.title = title @@ -201,18 +201,18 @@ class Plan(object): # Group commits based on a changeset (if any) - clines = shell('git changeset ' + commit.hash) + clines = changeset(commit.hash) if not clines: self.lines.append(commit) continue - changeset = Changeset(*clines.split('\n'), [], self.show_labels) + chset = Changeset(*clines, [], self.show_labels) if (isinstance(self.last, Changeset) and - self.last.title == changeset.title): - changeset = self.last + self.last.title == chset.title): + chset = self.last else: - self.lines.append(changeset) - changeset.commits.append(commit) + self.lines.append(chset) + chset.commits.append(commit) # Automark changesets if self.automark: @@ -343,7 +343,7 @@ with the following differences: * incremental (always applies to HEAD, no rewriting of history) * includes already applied commits for more context ("noop" marker) * supports the empty marker (to indicate unreviewed commits) - * groups commits by logical changesets (with git-changeset) + * groups commits by logical changesets (e.g. pull requests) git configuration (cherryPlan section): directory save plan files here if FILE is unspecified @@ -427,8 +427,6 @@ parser_update.set_defaults(verb=update) args = parser.parse_args() if hasattr(args, 'verb'): - if shutil.which('git-changeset') is None: - args.changeset_size = 0 plan = Plan(args.branch, args.changeset_size, args.show_labels, args.automark, args.add_timestamp) args.verb(plan, args) diff --git a/devel_doc/release_maintaince.md b/devel_doc/release_maintaince.md index 2eac8c8..e324325 100644 --- a/devel_doc/release_maintaince.md +++ b/devel_doc/release_maintaince.md @@ -59,7 +59,6 @@ Download the following utilities, put them into your `$PATH` and make them executable: * [git-cherry-plan](git-cherry-plan) -* [git-changeset](git-changeset) * [git-changelog](git-changelog) Also download the following file and place it alongside the above utilities: @@ -76,7 +75,7 @@ git config include.path /path/to/gitconfig ``` You will also need the [GitHub CLI utility](https://cli.github.com/) which is -required by `git-changeset`. On Fedora, get it with: +required for commit grouping. On Fedora, get it with: ``` dnf install gh @@ -98,10 +97,11 @@ chronological list of commits on master since the branching point, in a format similar to that of `git rebase -i`, and mark with `noop` those that have been cherry-picked already. -If the `git-changeset` utility is installed, commits will be grouped by their -originating GitHub pull requests. This may take a while on the first run since -the script needs to fetch the PR data from GitHub. The data is then cached -locally in the `.git/changeset/` directory and reused on next runs. +By default, commits will be grouped by their originating GitHub pull requests. +This may take a while on the first run since the script needs to fetch the PR +data from GitHub. The data is then cached locally in the `.git/changeset/` +directory and reused on next runs. This feature can be disabled with the `-s0` +switch. For complete usage help, run: @@ -209,8 +209,8 @@ If you use VIM, you can add [this](plan.vim) snippet into your `~/.vimrc` to cycle through markers on the current line with the `CTRL+SPACE` key and do a `git show` of the current commit with the `Enter` key. -If you also install the `git changeset` script mentioned above, you can type -`gx` to open the current commit's PR in your default browser. +If you install the [git-changeset](git-changeset) script into your `$PATH`, you +can type `gx` to open the current commit's PR in your default browser. ### Sharing a plan