Skip to content

Commit

Permalink
Merge pull request #144 from superphy/43-bulk-uploading
Browse files Browse the repository at this point in the history
FIX: edge cases in bulk uploading
  • Loading branch information
kevinkle authored Jul 5, 2017
2 parents 75bb2a9 + 43e242b commit e664bf0
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 13 deletions.
53 changes: 41 additions & 12 deletions app/modules/spfy.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,20 @@ def ectyper_pipeline(singles, multiples):
job_ectyper = singles.enqueue(
call_ectyper, single_dict, depends_on=job_id)
# after this call, the result is stored in Blazegraph
job_ectyper_datastruct = multiples.enqueue(
datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper)
# new to 4.3.3
# if bulk uploading is set, we return the datastruct as the end task
# to poll for job completion, therefore must set ttl of -1
if single_dict['options']['bulk']:
job_ectyper_datastruct = multiples.enqueue(
datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper, result_ttl=-1)
else:
job_ectyper_datastruct = multiples.enqueue(
datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper)
d = {'job_ectyper': job_ectyper,
'job_ectyper_datastruct': job_ectyper_datastruct}
# only bother parsing into json if user has requested either vf or
# serotype
if single_dict['options']['vf'] or single_dict['options']['serotype']:
if (single_dict['options']['vf'] or single_dict['options']['serotype']) and not single_dict['options']['bulk']:
job_ectyper_beautify = multiples.enqueue(
beautify, single_dict, query_file + '_ectyper.p', depends_on=job_ectyper, result_ttl=-1)
d.update({'job_ectyper_beautify': job_ectyper_beautify})
Expand All @@ -86,7 +93,8 @@ def ectyper_pipeline(singles, multiples):
ectyper_jobs = ectyper_pipeline(singles_q, multiples_q)
job_ectyper = ectyper_jobs['job_ectyper']
job_ectyper_datastruct = ectyper_jobs['job_ectyper_datastruct']
job_ectyper_beautify = ectyper_jobs['job_ectyper_beautify']
if not single_dict['options']['bulk']:
job_ectyper_beautify = ectyper_jobs['job_ectyper_beautify']
# or if the backlog queue is enabled
elif config.BACKLOG_ENABLED:
# we need to create a dict with these options enabled:
Expand All @@ -101,15 +109,19 @@ def amr_pipeline(multiples):
job_amr_dict = multiples.enqueue(
amr_to_dict, query_file + '_rgi.tsv', depends_on=job_amr)
# this uploads result to blazegraph
job_amr_datastruct = multiples.enqueue(
datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict)
if single_dict['options']['bulk']:
job_amr_datastruct = multiples.enqueue(
datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1)
else:
job_amr_datastruct = multiples.enqueue(
datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict)
d = {'job_amr': job_amr, 'job_amr_dict': job_amr_dict,
'job_amr_datastruct': job_amr_datastruct}
# we still check for the user-selected amr option again because
# if it was not selected but BACKLOG_ENABLED=True, we dont have to
# enqueue it to backlog_multiples_q since beautify doesnt upload
# blazegraph
if single_dict['options']['amr']:
if single_dict['options']['amr'] and not single_dict['options']['bulk']:
job_amr_beautify = multiples.enqueue(
beautify, single_dict, query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1)
d.update({'job_amr_beautify': job_amr_beautify})
Expand All @@ -120,7 +132,8 @@ def amr_pipeline(multiples):
job_amr = amr_jobs['job_amr']
job_amr_dict = amr_jobs['job_amr_dict']
job_amr_datastruct = amr_jobs['job_amr_datastruct']
job_amr_beautify = amr_jobs['job_amr_beautify']
if not single_dict['options']['bulk']:
job_amr_beautify = amr_jobs['job_amr_beautify']
elif config.BACKLOG_ENABLED:
amr_pipeline(backlog_multiples_q)
# END AMR PIPELINE
Expand All @@ -133,11 +146,27 @@ def amr_pipeline(multiples):
'analysis': 'Quality Control'}
jobs[job_id.get_id()] = {'file': single_dict['i'],
'analysis': 'ID Reservation'}
if single_dict['options']['vf'] or single_dict['options']['serotype']:
jobs[job_ectyper_beautify.get_id()] = {'file': single_dict[

# new to 4.3.3 if bulk ids used return the endpoint of datastruct generation
# to poll for completion of all jobs
# these two ifs handle the case where amr (or vf or serotype) might not
# be selected but bulk is
if (single_dict['options']['vf'] or single_dict['options']['serotype']):
ret_job_ectyper = job_ectyper_datastruct
if single_dict['options']['amr']:
ret_job_amr = job_amr_datastruct
# if bulk uploading isnt used, return the beautify result as the final task
if not single_dict['options']['bulk']:
if (single_dict['options']['vf'] or single_dict['options']['serotype']):
ret_job_ectyper = job_ectyper_beautify
if single_dict['options']['amr']:
ret_job_amr = job_amr_beautify
# add the jobs to the return
if (single_dict['options']['vf'] or single_dict['options']['serotype']):
jobs[ret_job_ectyper.get_id()] = {'file': single_dict[
'i'], 'analysis': 'Virulence Factors and Serotype'}
if single_dict['options']['amr']:
jobs[job_amr_beautify.get_id()] = {'file': single_dict[
jobs[ret_job_amr.get_id()] = {'file': single_dict[
'i'], 'analysis': 'Antimicrobial Resistance'}

return jobs
Expand Down Expand Up @@ -165,7 +194,7 @@ def spfy(args_dict):
# abs path resolution should be handled in spfy.py
#args_dict['i'] = os.path.abspath(args_dict['i'])

#print 'Starting blob_savvy call'
# print 'Starting blob_savvy call'
jobs_dict = blob_savvy(args_dict)

return jobs_dict
6 changes: 6 additions & 0 deletions app/routes/ra_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ def upload():
# we consider False as default as the front-end should override this
# to use the new feature
groupresults = False
# new to 4.3.3
# allows bulk uploading where results are not returned to user
# only the blobid to check statuses is returned (ie. don't run beautify)
options['bulk'] = False

# processing form data
for key, value in form.items():
Expand All @@ -193,6 +197,8 @@ def upload():
options['serotype']=value
if key == 'options.groupresults':
groupresults = value
if key == 'options.bulk':
options['bulk'] = value
else:
if key =='options.pi':
options['pi']=int(value)
Expand Down
2 changes: 1 addition & 1 deletion reactapp

0 comments on commit e664bf0

Please sign in to comment.