-
Notifications
You must be signed in to change notification settings - Fork 10
/
gitbackhub
executable file
·352 lines (302 loc) · 12.2 KB
/
gitbackhub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#!/usr/bin/env python3
# gitbackhub (part of ossobv/vcutil) // wdoekes/2015-2017 // Public Domain
#
# GitHub is cool. But since we aren't paying for it, we can't rely on
# its availability. This script ensures that we get a daily backup of
# all the repositories we host on github.
#
# Yes, I know that the distributed nature of git means that I always
# have a working copy of my repository somewhere. But this script
# ensures that they're all in a single safe location.
#
# Usage:
#
# gitbackhub ORGANISATION PATH_TO_REPOS VERBOSITY
#
# # where:
# # ORGANISATION is your organisation, like 'ossobv'
# # PATH_TO_REPOS is a path, like '/srv/backups/github/repos'
# # VERBOSITY is '-v0' or '-v3' where '-v0' is silent unless you
# # need to take action
# #
# # put it in your daily crontab or something
#
# Notes:
#
# As a default, the script denies destructive updates like
# force-updates and even branch deletes, except if the name
# contains any of "wip", "feat", "fix" like "development-wip".
# That way you can have certain branches that force-pushes are
# allowed on, like feature-branches that need to be rebased
# every now and then.
#
import os
from re import compile
from json import loads
from subprocess import STDOUT, check_call, check_output
from urllib.request import HTTPError, urlopen
SAFE_REPO_NAME_RE = compile(
r'^[A-Za-z0-9_][A-Za-z0-9_.-]*$')
WIP_WORDS = ('wip', 'feat', 'fix', 'dependabot')
class DestructiveChanges(ValueError):
pass
class ChangeSet(object):
def __init__(self):
self.clones = 0 # new repo
self.news = 0 # new branch/tag/...
self.updates = 0 # update branch
# Destructive:
self.deletes = 0 # deleted branch/tag/...
self.changes = 0 # forced update
# Destructive, but we don't care (WIP=work in progress):
self.deletes_wip = 0 # deleted branch/tag/... with "-wip"
self.changes_wip = 0 # forced update with "-wip"
self.log = []
def is_destructive(self):
# We ignore the "work in progress" changes here.
return bool(self.deletes + self.changes)
def __str__(self):
if not self:
return 'no changes'
ret = []
if self.clones:
ret.append('%d new cloned repositories' % (self.clones,))
if self.news:
ret.append('%d new items' % (self.news,))
if self.updates:
ret.append('%d updated items' % (self.updates,))
if self.deletes:
ret.append('%d DELETED items' % (self.deletes,))
if self.changes:
ret.append('%d CHANGED items' % (self.changes,))
if self.changes_wip or self.deletes_wip:
ret.append('%d/%d "work in progress" deletes/changes' % (
self.deletes_wip, self.changes_wip))
ret = ', '.join(ret)
lines = [ret] + self.log
return '\n'.join(lines)
def __bool__(self): # __nonzero__ in python2
return bool(self.clones + self.news + self.updates + self.deletes +
self.changes + self.deletes_wip + self.changes_wip)
class Repository(object):
def __init__(self, github, github_json):
self.github = github
self.name = github_json['name'] # or 'full_name'
self.clone_url = github_json['clone_url']
# Check that we're not about to allow the remote end to play nasty.
if not SAFE_REPO_NAME_RE.match(self.name):
raise ValueError('Suspicious repository name %s!' % (self.name,))
def __eq__(self, other):
return (
self.name == other.name and
self.clone_url == other.clone_url and
self.github == other.github)
def __lt__(self, other):
if self.name < other.name:
return True
if self.clone_url < other.clone_url:
return True
if self.github == other.github:
return False
assert False, (self.github, other.github)
def clone_or_update(self, allow_damaging_fetch=False):
if self.exists():
if not allow_damaging_fetch:
damaging = self.has_damaging_updates()
if damaging:
raise DestructiveChanges(damaging)
changeset = self.update()
else:
changeset = self.clone()
return changeset
def exists(self):
path = os.path.join(self.github.get_backup_destination(), self.name,
'.git')
return os.path.isdir(path)
def clone(self):
os.chdir(self.github.get_backup_destination())
check_call(['git', 'clone', '--quiet', self.clone_url, self.name])
# Config against accidental or malicious overwrites. Does this
# do anything?
path = os.path.join(self.github.get_backup_destination(), self.name)
os.chdir(path)
check_call(['git', 'config', '--local', 'receive.denyDeletes', 'true'])
check_call(['git', 'config', '--local', 'receive.denyNonFastForwards',
'true'])
# Return something sane.
changeset = ChangeSet()
changeset.clones = 1
return changeset
def has_damaging_updates(self):
path = os.path.join(self.github.get_backup_destination(), self.name)
os.chdir(path)
ret = check_output(['git', 'fetch', '--all', '--prune', '--dry-run'],
stderr=STDOUT)
changeset = self.collect_changes_from_output(ret)
if changeset.is_destructive():
assert bool(changeset)
return changeset
return False
def update(self):
path = os.path.join(self.github.get_backup_destination(), self.name)
os.chdir(path)
ret = check_output(['git', 'fetch', '--all', '--prune'],
stderr=STDOUT)
return self.collect_changes_from_output(ret)
def collect_changes_from_output(self, output):
changeset = ChangeSet()
output = str(output, 'utf-8').strip()
for lineno, line in enumerate(output.split('\n')):
# - [deleted] (none) -> origin/dependabot/npm...
line_after_arrow = ''
if '-> ' in line:
line_after_arrow = line.split('-> ', 1)[1]
# Fetching origin
# remote: Counting objects: 3, done.
# remote: Compressing objects: 100% (3/3), done.
# remote: Total 3 (delta 2), reused 1 (delta 0)
# Unpacking objects: 100% (3/3), done.
# From https://github.com/os...
if (line.startswith('Fetching origin') or
line.startswith('remote: ') or
line.startswith('Unpacking objects: ') or
line.startswith('From ')):
pass
# New stuff:
# * [new tag] test -> test
elif line.startswith(' * '):
changeset.news += 1
# Updated stuff:
# 1d3a96e..82330e0 test-fetch -> test-fetch
elif line.startswith(' '):
changeset.updates += 1
# Deleted stuff:
# - [deleted] (none) -> origin/test-fetch
# x [deleted] (none) -> origin/test-fetch [OLD]
elif line.startswith((' - ', ' x ')):
if any(wip_word in line_after_arrow for wip_word in WIP_WORDS):
changeset.deletes_wip += 1
else:
changeset.deletes += 1
# Changed stuff:
# + af324cd...1d3a96e test-fetch -> test-fetch (forced update)
elif line.startswith(' + '):
if any(wip_word in line_after_arrow for wip_word in WIP_WORDS):
changeset.changes_wip += 1
else:
changeset.changes += 1
# else: unexpected git output.
else:
raise ValueError('Unexpected git output on line %d in:\n%s' %
(lineno + 1, output))
if line.startswith(' '):
changeset.log.append(line)
return changeset
def __str__(self):
return '{0.github}/{0.name}'.format(self)
def __repr__(self):
return '<Repository({0.name}@{0.github}>'.format(self)
class GitHub(object):
def __init__(self, org, dstpath):
self.organisation = org
self.backup_destination = os.path.abspath(dstpath)
def get_backup_destination(self):
return self.backup_destination
def _get_next_url(self, link):
# Link:
# <https://api.github.com/organizations/:number/repos?
# type=sources&page=2>; rel="next",
# <https://api.github.com/organizations/:number/repos?
# type=sources&page=5>; rel="last"
if link is None:
return None
links = [i.strip() for i in link.split(',')]
next_ = [i for i in links if i.endswith('; rel="next"')]
if not next_:
return None
next_ = next_[0]
if not next_ or next_[0] != '<' or '>' not in next_:
assert False, link
return next_[1:].split('>', 1)[0]
def _get_repositories(self):
# With 'type=sources' we get only non-forked (i.e. our own)
# projects.
# NOTE: For some projects we *do* depend on our own forks though...
# could be something to keep in mind.
url = ('https://api.github.com/orgs/{0.organisation}/repos?'
'type=sources').format(self)
try:
fp = urlopen(url)
# fp = open('gitbackhub.tmp', 'rb')
except HTTPError:
url = ('https://api.github.com/users/{0.organisation}/repos?'
'type=sources').format(self)
fp = urlopen(url)
repositories = []
pages = 0
while True:
# The next_url may contain a page with the next resultset.
next_url = self._get_next_url(fp.headers.get('Link'))
blob = fp.read()
uniblob = blob.decode('utf-8')
json = loads(uniblob)
repositories.extend([Repository(self, i) for i in json])
if not next_url:
break
pages += 1
if pages >= 20:
assert False, 'more than 20 pages? {!r}'.format(next_url)
fp = urlopen(next_url)
return repositories
def get_repositories(self):
try:
repositories = self._get_repositories()
except HTTPError as err:
if err.fp:
try:
data = err.fp.read().decode('ascii', 'replace')
except Exception:
data = ''
# TODO: Fix this:
print('URL', err.url)
print('CODE', err.code)
print('MSG', err.msg)
print('HDRS', err.headers)
print('BODY', data)
raise err
repositories.sort()
return repositories
def __str__(self):
return 'GitHub/orgs/{0.organisation}'.format(self)
if __name__ == '__main__':
import sys
organisation = sys.argv[1] or 'ossobv'
backup_destination = sys.argv[2] or '/home/osso/GITHUB/repos'
quiet = (sys.argv[3] == '-v0')
errors = 0
g = GitHub(org=organisation, dstpath=backup_destination)
for repository in g.get_repositories():
try:
result = repository.clone_or_update()
except DestructiveChanges as e:
result = e.args[0]
print('Updating %s failed!\nDestructive changes ahead, '
'please check and resolve manually!\n%s\n' %
(repository, result))
print(' cd PATH; git fetch --all --prune --dry-run\n')
errors += 1
except:
print('Got error on repository:', repository)
raise
else:
if result:
if result.is_destructive():
print('Updated %s with the following DESTRUCTIVE '
'changes:\n%s\n' % (repository, result))
elif not quiet:
print('Updated %s with the following '
'changes:\n%s\n' % (repository, result))
if errors:
sys.exit(1)
sys.exit(0)
# vim: set ts=8 sw=4 sts=4 et ai: