-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revision cleanup: improve robustness, logging
- Add verbose mode for troubleshooting - Improve default logging (error statistics etc.) - More input validation for removing revisions and starting job - Introduce job-level lock for revision cleanups to prevent errors due to multiple jobs running simultaneously.
- Loading branch information
Showing
2 changed files
with
118 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,91 @@ | ||
#!/usr/bin/env python | ||
"""This script cleans up data object revisions, by invoking the revision cleanup rules.""" | ||
|
||
import argparse | ||
import atexit | ||
import json | ||
import os | ||
import subprocess | ||
import sys | ||
|
||
NAME = os.path.basename(sys.argv[0]) | ||
LOCKFILE_PATH = '/tmp/irods-{}.lock'.format(NAME) | ||
|
||
if len(sys.argv) != 3: | ||
print('Usage: {} endOfCalendarDay bucketcase'.format(sys.argv[0])) | ||
exit(1) | ||
|
||
endOfCalendarDay = sys.argv[1] | ||
bucketcase = sys.argv[2] | ||
def get_args(): | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument("endofcalendarday", help="End of calendar day (epoch time)") | ||
parser.add_argument("bucketcase", choices=["A", "B", "Simple"], help="Bucket case configuration name") | ||
parser.add_argument("--batch-size", type=int, default=1, help="Number of revisions to process at a time (default: 1).", required=False) | ||
parser.add_argument("-v", "--verbose", action="store_true", default=False, | ||
help="Make the revision cleanup rules print additional information for troubleshooting purposes.") | ||
return parser.parse_args() | ||
|
||
|
||
def clean_up(revisions): | ||
def lock_or_die(): | ||
"""Prevent running multiple instances of this job simultaneously""" | ||
|
||
# Create a lockfile for this job type, abort if it exists. | ||
try: | ||
fd = os.open(LOCKFILE_PATH, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600) | ||
except OSError: | ||
if os.path.exists(LOCKFILE_PATH): | ||
print('Not starting job: Lock file {} exists'.format(LOCKFILE_PATH)) | ||
exit(1) | ||
else: | ||
raise | ||
os.write(fd, bytes(str(os.getpid()).encode("utf-8"))) | ||
os.close(fd) | ||
|
||
# Remove lock no matter how we exit. | ||
atexit.register(lambda: os.unlink(LOCKFILE_PATH)) | ||
|
||
|
||
def clean_up(revisions, bucketcase, endofcalendarday, verbose_flag): | ||
chunk = json.dumps(revisions) | ||
chunk = "\\\\".join(chunk.split("\\")) | ||
chunk = "\\'".join(chunk.split("'")) | ||
return subprocess.check_output([ | ||
'irule', | ||
'-r', | ||
'irods_rule_engine_plugin-irods_rule_language-instance', | ||
"*out=''; rule_revisions_clean_up('{}', '{}', '{}', *out); writeString('stdout', *out);".format(chunk, bucketcase, endOfCalendarDay), | ||
"*out=''; rule_revisions_clean_up('{}', '{}', '{}', '{}', *out); writeString('stdout', *out);".format(chunk, bucketcase, endofcalendarday, verbose_flag), | ||
'null', | ||
'ruleExecOut' | ||
]) | ||
|
||
|
||
print('START cleaning up revision store') | ||
def get_revisions_info(): | ||
return json.loads(subprocess.check_output([ | ||
'irule', | ||
'-r', | ||
'irods_rule_engine_plugin-irods_rule_language-instance', | ||
'*out=""; rule_revisions_info(*out); writeString("stdout", *out);', | ||
'null', | ||
'ruleExecOut' | ||
])) | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
lock_or_die() | ||
revisions_info = get_revisions_info() | ||
|
||
if args.verbose: | ||
print('START cleaning up revision store') | ||
|
||
while len(revisions_info) > args.batch_size: | ||
if args.verbose: | ||
print("Clean up for " + str(revisions_info[:args.batch_size])) | ||
clean_up(revisions_info[:args.batch_size], | ||
args.bucketcase, | ||
args.endofcalendarday, | ||
"1" if args.verbose else "0") | ||
revisions_info = revisions_info[args.batch_size:] | ||
|
||
if args.verbose: | ||
print('END cleaning up revision store') | ||
|
||
revisions_info = json.loads(subprocess.check_output([ | ||
'irule', | ||
'-r', | ||
'irods_rule_engine_plugin-irods_rule_language-instance', | ||
'*out=""; rule_revisions_info(*out); writeString("stdout", *out);', | ||
'null', | ||
'ruleExecOut' | ||
])) | ||
|
||
while len(revisions_info) > 100: | ||
clean_up(revisions_info[:100]) | ||
revisions_info = revisions_info[100:] | ||
print(clean_up(revisions_info)) | ||
if __name__ == "__main__": | ||
main() |