Skip to content

Commit

Permalink
Optimize commit -> changeset resolution
Browse files Browse the repository at this point in the history
Turn this functionality into a Python function so that we don't have to
spawn a separate Python subprocess to resolve each commit.

This removes a major bottleneck in git-cherry-plan and git-changelog and
reduces their runtime from a couple of seconds down to microseconds (as
it should be, really).
  • Loading branch information
dmnks committed Dec 23, 2023
1 parent 803e55e commit faeb74b
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 91 deletions.
75 changes: 75 additions & 0 deletions devel_doc/common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import os
import re
import subprocess
import sys
Expand All @@ -21,6 +23,77 @@ def backports(branch):
res.extend(re.findall(r, log))
return res

def changeset(commit, refresh=False, quiet=False, split=True):
"""Return the GitHub PR belonging to the given commit hash."""

# Set variables
out = []
if len(commit) < SHA1_LEN:
commit = shell('git rev-parse {}'.format(commit))
commit_dir = '{}/changeset/commits/{}'.format(GIT_DIR, commit[:2])
commit_path = '{}/{}'.format(commit_dir, commit[2:])
url = 'https://github.com/rpm-software-management/rpm/pull/{}'
if os.path.islink(commit_path):
data_path = os.path.abspath(
os.path.join(commit_dir, os.readlink(commit_path)))
else:
data_path = commit_path

# Fetch and cache entry if needed
if refresh or not os.path.exists(data_path):
# Fetch data
if not quiet:
sys.stderr.write(
'Fetching changeset for commit {}\n'.format(commit))
data = shell('gh pr list --state merged --limit 1 --json '
'number,title,url,labels --search {}'.format(commit))
data = json.loads(data)
os.makedirs(commit_dir, exist_ok=True)
if not data:
with open(commit_path, 'w') as f:
f.write('')
sys.exit(1)
assert len(data) >= 1
data = data[0]

# Create entry
number = data['number']
title = data['title']
labels = sorted(x['name'] for x in data['labels'])
if 'release' in labels:
entry = ''
else:
entry = '{}\n{}'.format(title, ' '.join(labels))
number_dir = '{}/changeset/numbers'.format(GIT_DIR)
number_path = '{}/{}'.format(number_dir, number)

# Write entry to cache
os.makedirs(number_dir, exist_ok=True)
with open(number_path, 'w') as f:
f.write(entry)
if os.path.exists(commit_path):
os.unlink(commit_path)
os.symlink('../../numbers/{}'.format(number), commit_path)

# Return entry if cached
if os.path.islink(commit_path):
number = os.path.basename(os.readlink(commit_path))
with open(commit_path) as f:
entry = list(map(str.strip, f.readlines()))
if not entry:
sys.exit(1)
out.append('PR #{}: {}'.format(number, entry[0]))
if len(entry) == 2:
out.append(entry[1])
else:
out.append('')
out.append(url.format(number))

if not split:
out = '\n'.join(out)

return out


class KeyType:
SIMPLE = 0,
Expand Down Expand Up @@ -65,7 +138,9 @@ def get(self, key, type=KeyType.SIMPLE, default=None):
return d


GIT_DIR = shell('git rev-parse --path-format=absolute --git-common-dir')
CONFIG = GitConfig('cherryPlan')
BACKPORT_RE = ['^\\(cherry picked from commit (.*)\\)$'] + \
CONFIG.get('backportPatterns', KeyType.MULTILINE, [])
BACKPORT_RE = [re.compile(r, re.MULTILINE) for r in BACKPORT_RE]
SHA1_LEN = 40
9 changes: 2 additions & 7 deletions devel_doc/git-changelog
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import re
import subprocess
import sys

from common import shell, KeyType, GitConfig, backports
from common import shell, backports, changeset, KeyType, GitConfig


class PlainLog(object):
Expand Down Expand Up @@ -108,12 +108,7 @@ def pulls(branch):
ignore = set(IGNORE)

for commit in backports(branch):
opts = ''
if args.refresh:
opts += '-r '
if args.quiet:
opts += '-q '
lines = shell('git changeset {} {}'.format(opts, commit), split=True)
lines = changeset(commit, args.refresh, args.quiet)
if not lines:
continue
title = lines[0].split(':', 1)[1].strip()
Expand Down
70 changes: 5 additions & 65 deletions devel_doc/git-changeset
Original file line number Diff line number Diff line change
Expand Up @@ -6,78 +6,18 @@ import os
import subprocess
import sys

from common import shell
from common import shell, changeset


# Parse args
parser = argparse.ArgumentParser(
description='Print GitHub PR belonging to given refname.')
parser.add_argument('refname', type=str, help='git refname')
description='Print GitHub PR belonging to given commit.')
parser.add_argument('commit', type=str, help='git commit hash')
parser.add_argument('-r', '--refresh', action='store_true',
help='always fetch from server (do not use cache)')
parser.add_argument('-q', '--quiet', action='store_true',
help='do not print progress messages on stderr')
args = parser.parse_args()

# Set variables
GIT_DIR = shell('git rev-parse --path-format=absolute --git-common-dir')
NUMBER_DIR = '{}/changeset/numbers'.format(GIT_DIR)
COMMIT_HASH = shell('git rev-parse {}'.format(args.refname))
COMMIT_DIR = '{}/changeset/commits/{}'.format(GIT_DIR, COMMIT_HASH[:2])
COMMIT_PATH = '{}/{}'.format(COMMIT_DIR, COMMIT_HASH[2:])
URL = 'https://github.com/rpm-software-management/rpm/pull/{}'
if os.path.islink(COMMIT_PATH):
DATA_PATH = os.path.abspath(os.path.join(COMMIT_DIR,
os.readlink(COMMIT_PATH)))
else:
DATA_PATH = COMMIT_PATH

# Fetch and cache entry if needed
if args.refresh or not os.path.exists(DATA_PATH):
# Fetch data
if not args.quiet:
sys.stderr.write('Fetching changeset for commit {}\n'.format(COMMIT_HASH))
data = shell('gh pr list --state merged --limit 1 --json '
'number,title,url,labels --search {}'.format(COMMIT_HASH))
data = json.loads(data)
os.makedirs(COMMIT_DIR, exist_ok=True)
if not data:
with open(COMMIT_PATH, 'w') as f:
f.write('')
sys.exit(1)
assert len(data) >= 1
data = data[0]

# Create entry
number = data['number']
title = data['title']
labels = sorted(x['name'] for x in data['labels'])
if 'release' in labels:
entry = ''
else:
entry = '{}\n{}'.format(title, ' '.join(labels))
number_path = '{}/{}'.format(NUMBER_DIR, number)

# Write entry to cache
os.makedirs(NUMBER_DIR, exist_ok=True)
with open(number_path, 'w') as f:
f.write(entry)
if os.path.exists(COMMIT_PATH):
os.unlink(COMMIT_PATH)
os.symlink('../../numbers/{}'.format(number), COMMIT_PATH)

# Print entry if cached
if os.path.islink(COMMIT_PATH):
number = os.path.basename(os.readlink(COMMIT_PATH))
with open(COMMIT_PATH) as f:
entry = list(map(str.strip, f.readlines()))
if not entry:
sys.exit(1)
print('PR #{}: {}'.format(number, entry[0]))
if len(entry) == 2:
print(entry[1])
else:
print()
print(URL.format(number))
else:
sys.exit(1)
if __name__ == '__main__':
print(changeset(args.commit, args.refresh, args.quiet, split=False))
20 changes: 9 additions & 11 deletions devel_doc/git-cherry-plan
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import shutil
import subprocess
import sys

from common import shell, KeyType, GitConfig, backports
from common import shell, backports, changeset, KeyType, GitConfig


#
Expand Down Expand Up @@ -40,7 +40,7 @@ class Commit(object):
self.subject)

class Changeset(object):
"""A group of commits based on git-changeset."""
"""A group of commits forming a logical changeset."""

def __init__(self, title, labels, url, commits, show_labels=True):
self.title = title
Expand Down Expand Up @@ -201,18 +201,18 @@ class Plan(object):

# Group commits based on a changeset (if any)

clines = shell('git changeset ' + commit.hash)
clines = changeset(commit.hash)
if not clines:
self.lines.append(commit)
continue

changeset = Changeset(*clines.split('\n'), [], self.show_labels)
chset = Changeset(*clines, [], self.show_labels)
if (isinstance(self.last, Changeset) and
self.last.title == changeset.title):
changeset = self.last
self.last.title == chset.title):
chset = self.last
else:
self.lines.append(changeset)
changeset.commits.append(commit)
self.lines.append(chset)
chset.commits.append(commit)

# Automark changesets
if self.automark:
Expand Down Expand Up @@ -343,7 +343,7 @@ with the following differences:
* incremental (always applies to HEAD, no rewriting of history)
* includes already applied commits for more context ("noop" marker)
* supports the empty marker (to indicate unreviewed commits)
* groups commits by logical changesets (with git-changeset)
* groups commits by logical changesets (e.g. pull requests)
git configuration (cherryPlan section):
directory save plan files here if FILE is unspecified
Expand Down Expand Up @@ -427,8 +427,6 @@ parser_update.set_defaults(verb=update)

args = parser.parse_args()
if hasattr(args, 'verb'):
if shutil.which('git-changeset') is None:
args.changeset_size = 0
plan = Plan(args.branch, args.changeset_size, args.show_labels,
args.automark, args.add_timestamp)
args.verb(plan, args)
Expand Down
16 changes: 8 additions & 8 deletions devel_doc/release_maintaince.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ Download the following utilities, put them into your `$PATH` and make them
executable:

* [git-cherry-plan](git-cherry-plan)
* [git-changeset](git-changeset)
* [git-changelog](git-changelog)

Also download the following file and place it alongside the above utilities:
Expand All @@ -76,7 +75,7 @@ git config include.path /path/to/gitconfig
```

You will also need the [GitHub CLI utility](https://cli.github.com/) which is
required by `git-changeset`. On Fedora, get it with:
required for commit grouping. On Fedora, get it with:

```
dnf install gh
Expand All @@ -98,10 +97,11 @@ chronological list of commits on master since the branching point, in a format
similar to that of `git rebase -i`, and mark with `noop` those that have been
cherry-picked already.

If the `git-changeset` utility is installed, commits will be grouped by their
originating GitHub pull requests. This may take a while on the first run since
the script needs to fetch the PR data from GitHub. The data is then cached
locally in the `.git/changeset/` directory and reused on next runs.
By default, commits will be grouped by their originating GitHub pull requests.
This may take a while on the first run since the script needs to fetch the PR
data from GitHub. The data is then cached locally in the `.git/changeset/`
directory and reused on next runs. This feature can be disabled with the `-s0`
switch.

For complete usage help, run:

Expand Down Expand Up @@ -209,8 +209,8 @@ If you use VIM, you can add [this](plan.vim) snippet into your `~/.vimrc` to
cycle through markers on the current line with the `CTRL+SPACE` key and do a
`git show` of the current commit with the `Enter` key.

If you also install the `git changeset` script mentioned above, you can type
`gx` to open the current commit's PR in your default browser.
If you install the [git-changeset](git-changeset) script into your `$PATH`, you
can type `gx` to open the current commit's PR in your default browser.

### Sharing a plan

Expand Down

0 comments on commit faeb74b

Please sign in to comment.