Skip to content

Commit

Permalink
added support for blacklists for authors and paths
Browse files Browse the repository at this point in the history
  • Loading branch information
Monnoroch committed Mar 13, 2015
1 parent fe94d55 commit 7af9614
Showing 1 changed file with 65 additions and 42 deletions.
107 changes: 65 additions & 42 deletions gitstats
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ conf = {
'linear_linestats': 1,
'project_name': '',
'processes': 8,
'start_date': ''
'start_date': '',
'excluded_authors': [],
'excluded_prefixes': []
}

def getpipeoutput(cmds, quiet = False):
Expand Down Expand Up @@ -322,6 +324,8 @@ class GitDataCollector(DataCollector):
parts = re.split('\s+', line, 2)
commits = int(parts[1])
author = parts[2]
if author in conf["excluded_authors"]:
continue
self.tags[tag]['commits'] += commits
self.tags[tag]['authors'][author] = commits

Expand All @@ -338,6 +342,8 @@ class GitDataCollector(DataCollector):
timezone = parts[3]
author, mail = parts[4].split('<', 1)
author = author.rstrip()
if author in conf["excluded_authors"]:
continue
mail = mail.rstrip('>')
domain = '?'
if mail.find('@') != -1:
Expand Down Expand Up @@ -434,14 +440,16 @@ class GitDataCollector(DataCollector):
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1

# outputs "<stamp> <files>" for each revision
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T %%an" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
lines = []
revs_to_read = []
time_rev_count = []
#Look up rev in cache and take info from cache if found
#If not append rev to list of rev to read from repo
for revline in revlines:
time, rev = revline.split(' ')
time, rev, author = revline.split(' ')
if author in conf["excluded_authors"]:
continue
#if cache empty then add time and rev to list of new rev's
#otherwise try to read needed info from cache
if 'files_in_tree' not in self.cache.keys():
Expand Down Expand Up @@ -489,6 +497,14 @@ class GitDataCollector(DataCollector):
blob_id = parts[2]
size = int(parts[3])
fullpath = parts[4]
exclude = False
for path in conf["excluded_prefixes"]:
if fullpath.startswith(path):
exclude = True
break

if exclude:
continue

self.total_size += size
self.total_files += 1
Expand Down Expand Up @@ -540,6 +556,7 @@ class GitDataCollector(DataCollector):
lines.reverse()
files = 0; inserted = 0; deleted = 0; total_lines = 0
author = None
last_line = ""
for line in lines:
if len(line) == 0:
continue
Expand All @@ -550,35 +567,36 @@ class GitDataCollector(DataCollector):
if pos != -1:
try:
(stamp, author) = (int(line[:pos]), line[pos+1:])
self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }

date = datetime.datetime.fromtimestamp(stamp)
yymm = date.strftime('%Y-%m')
self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted

yy = date.year
self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted

files, inserted, deleted = 0, 0, 0
if author not in conf["excluded_authors"]:
self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }

date = datetime.datetime.fromtimestamp(stamp)
yymm = date.strftime('%Y-%m')
self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted

yy = date.year
self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted

files, inserted, deleted = 0, 0, 0

numbers = getstatsummarycounts(last_line)
if len(numbers) == 3:
(files, inserted, deleted) = map(lambda el : int(el), numbers)
total_lines += inserted
total_lines -= deleted
self.total_lines_added += inserted
self.total_lines_removed += deleted
else:
print 'Warning: failed to handle line "%s"' % line
(files, inserted, deleted) = (0, 0, 0)
except ValueError:
print 'Warning: unexpected line "%s"' % line
else:
print 'Warning: unexpected line "%s"' % line
else:
numbers = getstatsummarycounts(line)

if len(numbers) == 3:
(files, inserted, deleted) = map(lambda el : int(el), numbers)
total_lines += inserted
total_lines -= deleted
self.total_lines_added += inserted
self.total_lines_removed += deleted

else:
print 'Warning: failed to handle line "%s"' % line
(files, inserted, deleted) = (0, 0, 0)
last_line = line
#self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
self.total_lines += total_lines

Expand Down Expand Up @@ -606,21 +624,22 @@ class GitDataCollector(DataCollector):
try:
oldstamp = stamp
(stamp, author) = (int(line[:pos]), line[pos+1:])
if oldstamp > stamp:
# clock skew, keep old timestamp to avoid having ugly graph
stamp = oldstamp
if author not in self.authors:
self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
if stamp not in self.changes_by_date_by_author:
self.changes_by_date_by_author[stamp] = {}
if author not in self.changes_by_date_by_author[stamp]:
self.changes_by_date_by_author[stamp][author] = {}
self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
files, inserted, deleted = 0, 0, 0
if author not in conf["excluded_authors"]:
if oldstamp > stamp:
# clock skew, keep old timestamp to avoid having ugly graph
stamp = oldstamp
if author not in self.authors:
self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
if stamp not in self.changes_by_date_by_author:
self.changes_by_date_by_author[stamp] = {}
if author not in self.changes_by_date_by_author[stamp]:
self.changes_by_date_by_author[stamp][author] = {}
self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
files, inserted, deleted = 0, 0, 0
except ValueError:
print 'Warning: unexpected line "%s"' % line
else:
Expand All @@ -644,6 +663,8 @@ class GitDataCollector(DataCollector):

for name in self.authors.keys():
a = self.authors[name]
#if a is None:
# continue
a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
Expand Down Expand Up @@ -1489,3 +1510,5 @@ if __name__=='__main__':
g = GitStats()
g.run(sys.argv[1:])


mon@open-freelancer:~/dev/go/src/openfreelancers$

0 comments on commit 7af9614

Please sign in to comment.