From 2a60d85544331fa1a1808b186a177d9731fd7fc4 Mon Sep 17 00:00:00 2001 From: shenxianpeng Date: Thu, 14 Nov 2024 20:32:54 +0200 Subject: [PATCH 1/5] feat: porting to python 3.9 --- gitstats | 6 ++++-- requirments.txt | 0 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 requirments.txt diff --git a/gitstats b/gitstats index c71b0e4..ab76215 100755 --- a/gitstats +++ b/gitstats @@ -1,6 +1,8 @@ #!/usr/bin/env python2 # Copyright (c) 2007-2014 Heikki Hokkanen & others (see doc/AUTHOR) # GPLv2 / GPLv3 +# Copyright (c) 2024-present Xianpeng Shen . +# MIT License import datetime import getopt import glob @@ -14,8 +16,8 @@ import sys import time import zlib -if sys.version_info < (2, 6): - print >> sys.stderr, "Python 2.6 or higher is required for gitstats" +if sys.version_info < (3, 9): + print >> sys.stderr, "Python 3.9 or higher is required for gitstats" sys.exit(1) from multiprocessing import Pool diff --git a/requirments.txt b/requirments.txt new file mode 100644 index 0000000..e69de29 From 9848368c32abdc78ffcd2d5e1972dfdbfc47fbd9 Mon Sep 17 00:00:00 2001 From: shenxianpeng Date: Thu, 14 Nov 2024 22:20:58 +0200 Subject: [PATCH 2/5] feat: porting to python 3.9 and code formatting --- .gitignore | 1 + .pre-commit-config.yaml | 21 + doc/AUTHOR | 1 - doc/TODO.txt | 1 - doc/gitstats.pod | 1 - gitstats | 2955 ++++++++++++++++++++++----------------- sortable.js | 38 +- 7 files changed, 1705 insertions(+), 1313 deletions(-) create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f7275bb --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b3fa88f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,21 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-yaml + - id: check-toml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: requirements-txt-fixer +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.13.0 + hooks: + - id: mypy +- repo: https://github.com/psf/black + rev: 24.10.0 + hooks: + - id: black +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell diff --git a/doc/AUTHOR b/doc/AUTHOR index e53db0d..096e0a1 100644 --- a/doc/AUTHOR +++ b/doc/AUTHOR @@ -6,4 +6,3 @@ See the following command for list of authors who have contributed: Also thanks to the following people: Alexander Botero-Lowry - diff --git a/doc/TODO.txt b/doc/TODO.txt index d1b86d6..7b4388e 100644 --- a/doc/TODO.txt +++ b/doc/TODO.txt @@ -74,4 +74,3 @@ - Commits (% of all) - Author top ten - Month statistics - diff --git a/doc/gitstats.pod b/doc/gitstats.pod index 469e910..0b2c0d3 100644 --- a/doc/gitstats.pod +++ b/doc/gitstats.pod @@ -112,4 +112,3 @@ http://gitstats.sourceforge.net/ =head1 SEE ALSO L - diff --git a/gitstats b/gitstats index ab76215..8b37936 100755 --- a/gitstats +++ b/gitstats @@ -17,16 +17,16 @@ import time import zlib if sys.version_info < (3, 9): - print >> sys.stderr, "Python 3.9 or higher is required for gitstats" - sys.exit(1) + print("Python 3.9 or higher is required for gitstats", file=sys.stderr) + sys.exit(1) from multiprocessing import Pool -os.environ['LC_ALL'] = 'C' +os.environ["LC_ALL"] = "C" -GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n' -ON_LINUX = (platform.system() == 'Linux') -WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') +GNUPLOT_COMMON = "set terminal png transparent size 640,240\nset size 1.0,1.0\n" +ON_LINUX = platform.system() == "Linux" +WEEKDAYS = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun") exectime_internal = 0.0 exectime_external = 0.0 @@ -34,1149 +34,1490 @@ time_start = time.time() # By default, gnuplot is searched from path, but can be overridden with the # environment variable "GNUPLOT" -gnuplot_cmd = 'gnuplot' -if 'GNUPLOT' in os.environ: - gnuplot_cmd = os.environ['GNUPLOT'] +gnuplot_cmd = "gnuplot" +if "GNUPLOT" in os.environ: + gnuplot_cmd = os.environ["GNUPLOT"] conf = { - 'max_domains': 10, - 'max_ext_length': 10, - 'style': 'gitstats.css', - 'max_authors': 20, - 'authors_top': 5, - 'commit_begin': '', - 'commit_end': 'HEAD', - 'linear_linestats': 1, - 'project_name': '', - 'processes': 8, - 'start_date': '' + "max_domains": 10, + "max_ext_length": 10, + "style": "gitstats.css", + "max_authors": 20, + "authors_top": 5, + "commit_begin": "", + "commit_end": "HEAD", + "linear_linestats": 1, + "project_name": "", + "processes": 8, + "start_date": "", } -def getpipeoutput(cmds, quiet = False): - global exectime_external - start = time.time() - if not quiet and ON_LINUX and os.isatty(1): - print '>> ' + ' | '.join(cmds), - sys.stdout.flush() - p = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True) - processes=[p] - for x in cmds[1:]: - p = subprocess.Popen(x, stdin = p.stdout, stdout = subprocess.PIPE, shell = True) - processes.append(p) - output = p.communicate()[0] - for p in processes: - p.wait() - end = time.time() - if not quiet: - if ON_LINUX and os.isatty(1): - print '\r', - print '[%.5f] >> %s' % (end - start, ' | '.join(cmds)) - exectime_external += (end - start) - return output.rstrip('\n') - -def getlogrange(defaultrange = 'HEAD', end_only = True): - commit_range = getcommitrange(defaultrange, end_only) - if len(conf['start_date']) > 0: - return '--since="%s" "%s"' % (conf['start_date'], commit_range) - return commit_range - -def getcommitrange(defaultrange = 'HEAD', end_only = False): - if len(conf['commit_end']) > 0: - if end_only or len(conf['commit_begin']) == 0: - return conf['commit_end'] - return '%s..%s' % (conf['commit_begin'], conf['commit_end']) - return defaultrange + +def getpipeoutput(cmds, quiet=False): + global exectime_external + start = time.time() + if not quiet and ON_LINUX and os.isatty(1): + print(">> " + " | ".join(cmds), end=" ") + sys.stdout.flush() + p = subprocess.Popen(cmds[0], stdout=subprocess.PIPE, shell=True) + processes = [p] + for x in cmds[1:]: + p = subprocess.Popen(x, stdin=p.stdout, stdout=subprocess.PIPE, shell=True) + processes.append(p) + output = p.communicate()[0] + for p in processes: + p.wait() + end = time.time() + if not quiet: + if ON_LINUX and os.isatty(1): + print("\r", end=" ") + print("[%.5f] >> %s" % (end - start, " | ".join(cmds))) + exectime_external += end - start + return output.rstrip("\n") + + +def getlogrange(defaultrange="HEAD", end_only=True): + commit_range = getcommitrange(defaultrange, end_only) + if len(conf["start_date"]) > 0: + return '--since="%s" "%s"' % (conf["start_date"], commit_range) + return commit_range + + +def getcommitrange(defaultrange="HEAD", end_only=False): + if len(conf["commit_end"]) > 0: + if end_only or len(conf["commit_begin"]) == 0: + return conf["commit_end"] + return "%s..%s" % (conf["commit_begin"], conf["commit_end"]) + return defaultrange + def getkeyssortedbyvalues(dict): - return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))) + return [el[1] for el in sorted([(el[1], el[0]) for el in list(dict.items())])] + # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits') def getkeyssortedbyvaluekey(d, key): - return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys()))) + return [el[1] for el in sorted([(d[el][key], el) for el in list(d.keys())])] + def getstatsummarycounts(line): - numbers = re.findall('\d+', line) - if len(numbers) == 1: - # neither insertions nor deletions: may probably only happen for "0 files changed" - numbers.append(0); - numbers.append(0); - elif len(numbers) == 2 and line.find('(+)') != -1: - numbers.append(0); # only insertions were printed on line - elif len(numbers) == 2 and line.find('(-)') != -1: - numbers.insert(1, 0); # only deletions were printed on line - return numbers + numbers = re.findall(r"\d+", line) + if len(numbers) == 1: + # neither insertions nor deletions: may probably only happen for "0 files changed" + numbers.append(0) + numbers.append(0) + elif len(numbers) == 2 and line.find("(+)") != -1: + numbers.append(0) + # only insertions were printed on line + elif len(numbers) == 2 and line.find("(-)") != -1: + numbers.insert(1, 0) + # only deletions were printed on line + return numbers + VERSION = 0 + + def getversion(): - global VERSION - if VERSION == 0: - gitstats_repo = os.path.dirname(os.path.abspath(__file__)) - VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" % - (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])]) - return VERSION + global VERSION + if VERSION == 0: + gitstats_repo = os.path.dirname(os.path.abspath(__file__)) + VERSION = getpipeoutput( + [ + "git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" + % (gitstats_repo, gitstats_repo, getcommitrange("HEAD").split("\n")[0]) + ] + ) + return VERSION + def getgitversion(): - return getpipeoutput(['git --version']).split('\n')[0] + return getpipeoutput(["git --version"]).split("\n")[0] + def getgnuplotversion(): - return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0] + return getpipeoutput(["%s --version" % gnuplot_cmd]).split("\n")[0] + def getnumoffilesfromrev(time_rev): - """ - Get number of files changed in commit - """ - time, rev = time_rev - return (int(time), rev, int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])) + """ + Get number of files changed in commit + """ + time, rev = time_rev + return ( + int(time), + rev, + int( + getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, "wc -l"]).split( + "\n" + )[0] + ), + ) + def getnumoflinesinblob(ext_blob): - """ - Get number of lines in blob - """ - ext, blob_id = ext_blob - return (ext, blob_id, int(getpipeoutput(['git cat-file blob %s' % blob_id, 'wc -l']).split()[0])) + """ + Get number of lines in blob + """ + ext, blob_id = ext_blob + return ( + ext, + blob_id, + int(getpipeoutput(["git cat-file blob %s" % blob_id, "wc -l"]).split()[0]), + ) + class DataCollector: - """Manages data collection from a revision control repository.""" - def __init__(self): - self.stamp_created = time.time() - self.cache = {} - self.total_authors = 0 - self.activity_by_hour_of_day = {} # hour -> commits - self.activity_by_day_of_week = {} # day -> commits - self.activity_by_month_of_year = {} # month [1-12] -> commits - self.activity_by_hour_of_week = {} # weekday -> hour -> commits - self.activity_by_hour_of_day_busiest = 0 - self.activity_by_hour_of_week_busiest = 0 - self.activity_by_year_week = {} # yy_wNN -> commits - self.activity_by_year_week_peak = 0 - - self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed} - - self.total_commits = 0 - self.total_files = 0 - self.authors_by_commits = 0 - - # domains - self.domains = {} # domain -> commits - - # author of the month - self.author_of_month = {} # month -> author -> commits - self.author_of_year = {} # year -> author -> commits - self.commits_by_month = {} # month -> commits - self.commits_by_year = {} # year -> commits - self.lines_added_by_month = {} # month -> lines added - self.lines_added_by_year = {} # year -> lines added - self.lines_removed_by_month = {} # month -> lines removed - self.lines_removed_by_year = {} # year -> lines removed - self.first_commit_stamp = 0 - self.last_commit_stamp = 0 - self.last_active_day = None - self.active_days = set() - - # lines - self.total_lines = 0 - self.total_lines_added = 0 - self.total_lines_removed = 0 - - # size - self.total_size = 0 - - # timezone - self.commits_by_timezone = {} # timezone -> commits - - # tags - self.tags = {} - - self.files_by_stamp = {} # stamp -> files - - # extensions - self.extensions = {} # extension -> files, lines - - # line statistics - self.changes_by_date = {} # stamp -> { files, ins, del } - - ## - # This should be the main function to extract data from the repository. - def collect(self, dir): - self.dir = dir - if len(conf['project_name']) == 0: - self.projectname = os.path.basename(os.path.abspath(dir)) - else: - self.projectname = conf['project_name'] - - ## - # Load cacheable data - def loadCache(self, cachefile): - if not os.path.exists(cachefile): - return - print 'Loading cache...' - f = open(cachefile, 'rb') - try: - self.cache = pickle.loads(zlib.decompress(f.read())) - except: - # temporary hack to upgrade non-compressed caches - f.seek(0) - self.cache = pickle.load(f) - f.close() - - ## - # Produce any additional statistics from the extracted data. - def refine(self): - pass - - ## - # : get a dictionary of author - def getAuthorInfo(self, author): - return None - - def getActivityByDayOfWeek(self): - return {} - - def getActivityByHourOfDay(self): - return {} - - # : get a dictionary of domains - def getDomainInfo(self, domain): - return None - - ## - # Get a list of authors - def getAuthors(self): - return [] - - def getFirstCommitDate(self): - return datetime.datetime.now() - - def getLastCommitDate(self): - return datetime.datetime.now() - - def getStampCreated(self): - return self.stamp_created - - def getTags(self): - return [] - - def getTotalAuthors(self): - return -1 - - def getTotalCommits(self): - return -1 - - def getTotalFiles(self): - return -1 - - def getTotalLOC(self): - return -1 - - ## - # Save cacheable data - def saveCache(self, cachefile): - print 'Saving cache...' - tempfile = cachefile + '.tmp' - f = open(tempfile, 'wb') - #pickle.dump(self.cache, f) - data = zlib.compress(pickle.dumps(self.cache)) - f.write(data) - f.close() - try: - os.remove(cachefile) - except OSError: - pass - os.rename(tempfile, cachefile) + """Manages data collection from a revision control repository.""" + + def __init__(self): + self.stamp_created = time.time() + self.cache = {} + self.total_authors = 0 + self.activity_by_hour_of_day = {} # hour -> commits + self.activity_by_day_of_week = {} # day -> commits + self.activity_by_month_of_year = {} # month [1-12] -> commits + self.activity_by_hour_of_week = {} # weekday -> hour -> commits + self.activity_by_hour_of_day_busiest = 0 + self.activity_by_hour_of_week_busiest = 0 + self.activity_by_year_week = {} # yy_wNN -> commits + self.activity_by_year_week_peak = 0 + + self.authors = ( + {} + ) # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed} + + self.total_commits = 0 + self.total_files = 0 + self.authors_by_commits = 0 + + # domains + self.domains = {} # domain -> commits + + # author of the month + self.author_of_month = {} # month -> author -> commits + self.author_of_year = {} # year -> author -> commits + self.commits_by_month = {} # month -> commits + self.commits_by_year = {} # year -> commits + self.lines_added_by_month = {} # month -> lines added + self.lines_added_by_year = {} # year -> lines added + self.lines_removed_by_month = {} # month -> lines removed + self.lines_removed_by_year = {} # year -> lines removed + self.first_commit_stamp = 0 + self.last_commit_stamp = 0 + self.last_active_day = None + self.active_days = set() + + # lines + self.total_lines = 0 + self.total_lines_added = 0 + self.total_lines_removed = 0 + + # size + self.total_size = 0 + + # timezone + self.commits_by_timezone = {} # timezone -> commits + + # tags + self.tags = {} + + self.files_by_stamp = {} # stamp -> files + + # extensions + self.extensions = {} # extension -> files, lines + + # line statistics + self.changes_by_date = {} # stamp -> { files, ins, del } + + ## + # This should be the main function to extract data from the repository. + def collect(self, dir): + self.dir = dir + if len(conf["project_name"]) == 0: + self.projectname = os.path.basename(os.path.abspath(dir)) + else: + self.projectname = conf["project_name"] + + ## + # Load cacheable data + def loadCache(self, cachefile): + if not os.path.exists(cachefile): + return + print("Loading cache...") + f = open(cachefile, "rb") + try: + self.cache = pickle.loads(zlib.decompress(f.read())) + except: + # temporary hack to upgrade non-compressed caches + f.seek(0) + self.cache = pickle.load(f) + f.close() + + ## + # Produce any additional statistics from the extracted data. + def refine(self): + pass + + ## + # : get a dictionary of author + def getAuthorInfo(self, author): + return None + + def getActivityByDayOfWeek(self): + return {} + + def getActivityByHourOfDay(self): + return {} + + # : get a dictionary of domains + def getDomainInfo(self, domain): + return None + + ## + # Get a list of authors + def getAuthors(self): + return [] + + def getFirstCommitDate(self): + return datetime.datetime.now() + + def getLastCommitDate(self): + return datetime.datetime.now() + + def getStampCreated(self): + return self.stamp_created + + def getTags(self): + return [] + + def getTotalAuthors(self): + return -1 + + def getTotalCommits(self): + return -1 + + def getTotalFiles(self): + return -1 + + def getTotalLOC(self): + return -1 + + ## + # Save cacheable data + def saveCache(self, cachefile): + print("Saving cache...") + tempfile = cachefile + ".tmp" + f = open(tempfile, "wb") + # pickle.dump(self.cache, f) + data = zlib.compress(pickle.dumps(self.cache)) + f.write(data) + f.close() + try: + os.remove(cachefile) + except OSError: + pass + os.rename(tempfile, cachefile) + class GitDataCollector(DataCollector): - def collect(self, dir): - DataCollector.collect(self, dir) - - self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l'])) - #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l')) - - # tags - lines = getpipeoutput(['git show-ref --tags']).split('\n') - for line in lines: - if len(line) == 0: - continue - (hash, tag) = line.split(' ') - - tag = tag.replace('refs/tags/', '') - output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash]) - if len(output) > 0: - parts = output.split(' ') - stamp = 0 - try: - stamp = int(parts[0]) - except ValueError: - stamp = 0 - self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} } - - # collect info on tags, starting from latest - tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items())))) - prev = None - for tag in reversed(tags_sorted_by_date_desc): - cmd = 'git shortlog -s "%s"' % tag - if prev != None: - cmd += ' "^%s"' % prev - output = getpipeoutput([cmd]) - if len(output) == 0: - continue - prev = tag - for line in output.split('\n'): - parts = re.split('\s+', line, 2) - commits = int(parts[1]) - author = parts[2] - self.tags[tag]['commits'] += commits - self.tags[tag]['authors'][author] = commits - - # Collect revision statistics - # Outputs "