From 7af9614d575a54b209c501f24423516c9341954c Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 13 Mar 2015 18:31:41 +0300 Subject: [PATCH] added support for blacklists for authors and paths --- gitstats | 107 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/gitstats b/gitstats index 48cbf1e..86d5d51 100755 --- a/gitstats +++ b/gitstats @@ -47,7 +47,9 @@ conf = { 'linear_linestats': 1, 'project_name': '', 'processes': 8, - 'start_date': '' + 'start_date': '', + 'excluded_authors': [], + 'excluded_prefixes': [] } def getpipeoutput(cmds, quiet = False): @@ -322,6 +324,8 @@ class GitDataCollector(DataCollector): parts = re.split('\s+', line, 2) commits = int(parts[1]) author = parts[2] + if author in conf["excluded_authors"]: + continue self.tags[tag]['commits'] += commits self.tags[tag]['authors'][author] = commits @@ -338,6 +342,8 @@ class GitDataCollector(DataCollector): timezone = parts[3] author, mail = parts[4].split('<', 1) author = author.rstrip() + if author in conf["excluded_authors"]: + continue mail = mail.rstrip('>') domain = '?' if mail.find('@') != -1: @@ -434,14 +440,16 @@ class GitDataCollector(DataCollector): self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1 # outputs " " for each revision - revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n') + revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T %%an" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n') lines = [] revs_to_read = [] time_rev_count = [] #Look up rev in cache and take info from cache if found #If not append rev to list of rev to read from repo for revline in revlines: - time, rev = revline.split(' ') + time, rev, author = revline.split(' ') + if author in conf["excluded_authors"]: + continue #if cache empty then add time and rev to list of new rev's #otherwise try to read needed info from cache if 'files_in_tree' not in self.cache.keys(): @@ -489,6 +497,14 @@ class GitDataCollector(DataCollector): blob_id = parts[2] size = int(parts[3]) fullpath = parts[4] + exclude = False + for path in conf["excluded_prefixes"]: + if fullpath.startswith(path): + exclude = True + break + + if exclude: + continue self.total_size += size self.total_files += 1 @@ -540,6 +556,7 @@ class GitDataCollector(DataCollector): lines.reverse() files = 0; inserted = 0; deleted = 0; total_lines = 0 author = None + last_line = "" for line in lines: if len(line) == 0: continue @@ -550,35 +567,36 @@ class GitDataCollector(DataCollector): if pos != -1: try: (stamp, author) = (int(line[:pos]), line[pos+1:]) - self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines } - - date = datetime.datetime.fromtimestamp(stamp) - yymm = date.strftime('%Y-%m') - self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted - self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted - - yy = date.year - self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted - self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted - - files, inserted, deleted = 0, 0, 0 + if author not in conf["excluded_authors"]: + self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines } + + date = datetime.datetime.fromtimestamp(stamp) + yymm = date.strftime('%Y-%m') + self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted + self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted + + yy = date.year + self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted + self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted + + files, inserted, deleted = 0, 0, 0 + + numbers = getstatsummarycounts(last_line) + if len(numbers) == 3: + (files, inserted, deleted) = map(lambda el : int(el), numbers) + total_lines += inserted + total_lines -= deleted + self.total_lines_added += inserted + self.total_lines_removed += deleted + else: + print 'Warning: failed to handle line "%s"' % line + (files, inserted, deleted) = (0, 0, 0) except ValueError: print 'Warning: unexpected line "%s"' % line else: print 'Warning: unexpected line "%s"' % line else: - numbers = getstatsummarycounts(line) - - if len(numbers) == 3: - (files, inserted, deleted) = map(lambda el : int(el), numbers) - total_lines += inserted - total_lines -= deleted - self.total_lines_added += inserted - self.total_lines_removed += deleted - - else: - print 'Warning: failed to handle line "%s"' % line - (files, inserted, deleted) = (0, 0, 0) + last_line = line #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted } self.total_lines += total_lines @@ -606,21 +624,22 @@ class GitDataCollector(DataCollector): try: oldstamp = stamp (stamp, author) = (int(line[:pos]), line[pos+1:]) - if oldstamp > stamp: - # clock skew, keep old timestamp to avoid having ugly graph - stamp = oldstamp - if author not in self.authors: - self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0} - self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1 - self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted - self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted - if stamp not in self.changes_by_date_by_author: - self.changes_by_date_by_author[stamp] = {} - if author not in self.changes_by_date_by_author[stamp]: - self.changes_by_date_by_author[stamp][author] = {} - self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added'] - self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits'] - files, inserted, deleted = 0, 0, 0 + if author not in conf["excluded_authors"]: + if oldstamp > stamp: + # clock skew, keep old timestamp to avoid having ugly graph + stamp = oldstamp + if author not in self.authors: + self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0} + self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1 + self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted + self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted + if stamp not in self.changes_by_date_by_author: + self.changes_by_date_by_author[stamp] = {} + if author not in self.changes_by_date_by_author[stamp]: + self.changes_by_date_by_author[stamp][author] = {} + self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added'] + self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits'] + files, inserted, deleted = 0, 0, 0 except ValueError: print 'Warning: unexpected line "%s"' % line else: @@ -644,6 +663,8 @@ class GitDataCollector(DataCollector): for name in self.authors.keys(): a = self.authors[name] + #if a is None: + # continue a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits() date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']) date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']) @@ -1489,3 +1510,5 @@ if __name__=='__main__': g = GitStats() g.run(sys.argv[1:]) + +mon@open-freelancer:~/dev/go/src/openfreelancers$