Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
saijananiganesan committed Aug 17, 2020
1 parent b5766c2 commit 3623842
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 73 deletions.
101 changes: 65 additions & 36 deletions master/pyext/src/validation/Report.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def __init__(self,mmcif_file):
self.I=get_input_information(self.mmcif_file)

def run_entry_composition(self,Template_Dict):
'''
get entry composition, relies on IHM library
'''
start=time.process_time()
name=self.mmcif_file.split('.')[0].split('_')[0]
if self.I.get_ensembles():
Expand Down Expand Up @@ -68,26 +71,31 @@ def run_entry_composition(self,Template_Dict):
return Template_Dict

def run_model_quality(self,Template_Dict):
'''
get excluded volume for multiscale models
get molprobity info for atomic models
exception: models with DNA--we need a way to assess models with DNA
'''
print ("exo",self.I.check_sphere())
if self.I.check_sphere()<1:
#global clashscore; global rama; global sidechain;
exv_data=None
I_mp=molprobity.get_molprobity_information(self.mmcif_file)
if I_mp.check_for_molprobity():
filename = os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_mp.txt'))
filename = os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_mp.txt'))
print (filename)
if os.path.exists(filename):
d_mp={}
print ("Molprobity analysis file already exists...\n...assuming clashscores, Ramachandran and rotamer outliers have already been calculated")
with open(filename,'rb') as fp:
d_mp['molprobity']=pickle.load(fp)
f_rota=os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_rota.txt'))
f_rota=os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_rota.txt'))
with open(f_rota,'rb') as fp:
d_mp['rota']=pickle.load(fp)
f_rama=os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_rama.txt'))
f_rama=os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_rama.txt'))
with open(f_rama,'rb') as fp:
d_mp['rama']=pickle.load(fp)
f_clash=os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_clash.txt'))
f_clash=os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_clash.txt'))
with open(f_clash,'rb') as fp:
d_mp['clash']=pickle.load(fp)
else:
Expand All @@ -112,26 +120,35 @@ def run_model_quality(self,Template_Dict):
Template_Dict['assess_atomic_segments']='Clashscore: '+ str(clashscore) + ', Ramachandran outliers: '+ str(rama)+ '% '+', Sidechain outliers: '+str(sidechain)+'%'
Template_Dict['assess_excluded_volume']=['Not applicable']
else:
self.I.rewrite_mmcif()
if I_mp.check_for_molprobity()==False:
self.I.rewrite_mmcif()
I_mp=molprobity.get_molprobity_information('test.cif')
print ("file rewritten")
if I_mp.check_for_molprobity():
print ("Molprobity analysis is being calculated...")
manager = Manager()
d_mp=manager.dict()
runInParallel(I_mp.run_clashscore(d_mp),I_mp.run_ramalyze(d_mp),I_mp.run_rotalyze(d_mp),I_mp.run_molprobity(d_mp))
a,b=I_mp.process_molprobity(d_mp['molprobity'])
Template_Dict['bond']=len(a); Template_Dict['angle']=len(b)
clashscore,rama,sidechain=I_mp.get_data_for_quality_at_glance(d_mp['molprobity'])
Template_Dict['molp_b']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_bonds(a))
Template_Dict['molp_a']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_angles(b))
Template_Dict['rotascore']=utility.dict_to_JSlist(I_mp.rota_summary_table(I_mp.process_rota(d_mp['rota'])))
Template_Dict['rotalist']=utility.dict_to_JSlist(I_mp.rota_detailed_table(I_mp.process_rota(d_mp['rota'])))
Template_Dict['ramascore']=utility.dict_to_JSlist(I_mp.rama_summary_table(I_mp.process_rama(d_mp['rama'])))
Template_Dict['ramalist']=utility.dict_to_JSlist(I_mp.rama_detailed_table(I_mp.process_rama(d_mp['rama'])))
clashscores,Template_Dict['tot']=I_mp.clash_summary_table(d_mp['clash'])
Template_Dict['clashscore_list']=utility.dict_to_JSlist(clashscores)
Template_Dict['clashlist']=I_mp.clash_detailed_table(d_mp['clash'])
Template_Dict['assess_atomic_segments']='Clashscore: '+ str(clashscore) + ', Ramachandran outliers: '+ str(rama)+ '% '+', Sidechain outliers: '+str(sidechain)+'%'
Template_Dict['assess_excluded_volume']=['Not applicable']
try:
runInParallel(I_mp.run_clashscore(d_mp),I_mp.run_ramalyze(d_mp),I_mp.run_rotalyze(d_mp),I_mp.run_molprobity(d_mp))
a,b=I_mp.process_molprobity(d_mp['molprobity'])
Template_Dict['bond']=len(a); Template_Dict['angle']=len(b)
clashscore,rama,sidechain=I_mp.get_data_for_quality_at_glance(d_mp['molprobity'])
Template_Dict['molp_b']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_bonds(a))
Template_Dict['molp_a']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_angles(b))
Template_Dict['rotascore']=utility.dict_to_JSlist(I_mp.rota_summary_table(I_mp.process_rota(d_mp['rota'])))
Template_Dict['rotalist']=utility.dict_to_JSlist(I_mp.rota_detailed_table(I_mp.process_rota(d_mp['rota'])))
Template_Dict['ramascore']=utility.dict_to_JSlist(I_mp.rama_summary_table(I_mp.process_rama(d_mp['rama'])))
Template_Dict['ramalist']=utility.dict_to_JSlist(I_mp.rama_detailed_table(I_mp.process_rama(d_mp['rama'])))
clashscores,Template_Dict['tot']=I_mp.clash_summary_table(d_mp['clash'])
Template_Dict['clashscore_list']=utility.dict_to_JSlist(clashscores)
Template_Dict['clashlist']=I_mp.clash_detailed_table(d_mp['clash'])
Template_Dict['assess_atomic_segments']='Clashscore: '+ str(clashscore) + ', Ramachandran outliers: '+ str(rama)+ '% '+', Sidechain outliers: '+str(sidechain)+'%'
Template_Dict['assess_excluded_volume']=['Not applicable']
except:
print ("Molprobity cannot be calculated...")
clashscore=None
rama=None
sidechain=None
else:
Template_Dict['assess_atomic_segments']='Not applicable'
file=os.getcwd()+'Output/results/'+str(Template_Dict['ID'])+'exv.txt'
Expand All @@ -154,15 +171,22 @@ def run_model_quality(self,Template_Dict):
return Template_Dict,clashscore,rama,sidechain,exv_data

def run_sas_validation(self,Template_Dict):
#global sas_data; global sas_fit;
'''
get sas validation information from SASCIF or JSON files
'''
if self.I.check_for_sas(self.I.get_dataset_comp()):
Template_Dict['sas']=["True"]
I_sas=sas.sas_validation(self.mmcif_file)
Template_Dict['p_val']=utility.dict_to_JSlist(I_sas.get_pvals())
Template_Dict['sasdb_code']=I_sas.get_SASBDB_code()
Template_Dict['parameters_volume']=utility.dict_to_JSlist(I_sas.get_parameters_vol_many())
Template_Dict['parameters_mw']=utility.dict_to_JSlist(I_sas.get_parameters_mw_many())
#Template_Dict['parameters_mw_sascif']=utility.dict_to_JSlist(I_sas.get_mw_from_sascif())
try:
Template_Dict['parameters_volume']=utility.dict_to_JSlist(I_sas.get_parameters_vol_many())
except:
Template_Dict['parameters_volume']=utility.dict_to_JSlist(I_sas.get_parameters_vol_many_dep())
try:
Template_Dict['parameters_mw']=utility.dict_to_JSlist(I_sas.get_parameters_mw_many())
except:
Template_Dict['parameters_mw']=utility.dict_to_JSlist(I_sas.get_parameters_mw_many_dep())
Template_Dict['pddf_info']=utility.dict_to_JSlist(I_sas.get_pddf_info())
Template_Dict['number_of_fits']=I_sas.get_total_fits()
Template_Dict['chi_table']=utility.dict_to_JSlist(I_sas.get_chi_table())
Expand All @@ -172,25 +196,28 @@ def run_sas_validation(self,Template_Dict):
Template_Dict['validation_input']=utility.get_rg_data_fits(I_sas.get_fits_for_plot())
if len(Template_Dict['validation_input'])<1:
Template_Dict['validation_input']=['Fit of model to data has not been deposited']
I_sas_plt=validation.sas_plots.sas_validation_plots(self.mmcif_file)
I_sas.modify_intensity()
I_sas.get_pofr_errors()
I_sas_plt.plot_multiple()
I_sas_plt.plot_pf()
I_sas_plt.plot_Guinier()
if Template_Dict['number_of_fits']>0:
I_sas_plt.plot_fits()
#I_sas_plt.plot_residuals()
#I_sas.get_fit_image()
try:
I_sas_plt=validation.sas_plots.sas_validation_plots(self.mmcif_file)
I_sas.modify_intensity()
I_sas.get_pofr_errors()
I_sas_plt.plot_multiple()
I_sas_plt.plot_pf()
I_sas_plt.plot_Guinier()
if Template_Dict['number_of_fits']>0:
I_sas_plt.plot_fits()
except:
pass
sas_data=I_sas.get_rg_for_plot()
sas_fit=I_sas.get_fits_for_plot()

else:
sas_data={}
sas_fit={}
return Template_Dict,sas_data,sas_fit

def run_quality_glance(self,clashscore,rama,sidechain,exv_data,sas_data,sas_fit):
'''
get quality at glance image; will be updated as validation report is updated
'''
I_plt=get_plots.plots(self.mmcif_file)
I_plt.plot_quality_at_glance(clashscore,rama,sidechain,exv_data,sas_data,sas_fit)

Expand All @@ -205,7 +232,9 @@ def run_supplementary_table(self,
Data_quality=['-'],
clustering='N/A',
resolution='N/A'):

'''
get supplementary table, will be updated as validation report is updated
'''
if (self.I.get_ensembles() is not None) and (utility.all_same(self.I.get_ensembles()['Clustering method'])):
Template_Dict['clustering']=self.I.get_ensembles()['Clustering method'][0]
elif self.I.get_ensembles() is not None:
Expand Down
6 changes: 3 additions & 3 deletions master/pyext/src/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,11 @@ def mmcif_get_lists(self,filetemp=None):
if len(j)<=index_occu:
j.extend(['1'])
elif j[index_occu]=='.':
j[index_occu]='1'
j[index_occu]='0.67'
if len(j)<=index_biso:
j.extend(['1'])
elif j[index_biso]=='.':
j[index_biso]='1'
j[index_biso]='0.00'
atoms[i]=j
elif len(j)> 0 and (i > list(atom_site.keys())[-1]):
if len(after_atom)==0:
Expand All @@ -536,7 +536,7 @@ def rewrite_mmcif(self):
if os.path.isfile('test.cif'):
os.remove('test.cif')
file_re=open('test.cif','w')
for i, j in enumerate(before_atom_site):
for i, j in enumerate(before_atom_site[:-1]):
file_re.write(' '.join(j)+'\n')
for i, j in atom_site.items():
file_re.write(''.join(j)+'\n')
Expand Down
9 changes: 8 additions & 1 deletion master/pyext/src/validation/get_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ def __init__(self,mmcif):
self.filename_add = os.path.join('static/images//')

def plot_quality_at_glance(self,clashscore,rama,sidechain,exv_data,sas_data,sas_fit):
'''
plot quality of glance with multiple tabs
will be updated as validation report is updated
'''
output_file(self.ID+"quality_at_glance.html",mode="inline")
#print (clashscore,rama,sidechain,exv_data,sas_data,sas_fit)
tabsI=[]
if clashscore or rama or sidechain:
counts=[clashscore,rama,sidechain]
Expand All @@ -56,6 +59,10 @@ def plot_quality_at_glance(self,clashscore,rama,sidechain,exv_data,sas_data,sas_
p = figure(y_range=Scores, x_range=(lower,upper), plot_height=250, plot_width=700, title='Model quality: Excluded Volume Analysis')
p.hbar(y='Scores',right='counts', height=0.5, color='color', legend="legends", source=source,alpha=0.8)
p.xaxis.axis_label = 'Number of violations'
else:
Scores=[''];counts = [''];legends=['']
source = ColumnDataSource(data=dict(Scores=Scores, counts=counts, legends=legends))
p = figure(y_range=Scores, x_range=(0,1), plot_height=250, plot_width=700)

Scores=[''];counts = [''];legends=['']
source = ColumnDataSource(data=dict(Scores=Scores, counts=counts, legends=legends))
Expand Down
3 changes: 0 additions & 3 deletions master/pyext/src/validation/sas.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,8 @@ def get_parameters_mw_many(self):
else:
parameter_table['Porod Volume/MW'].append('N/A')


return parameter_table


def get_parameters_mw_many_dep(self):
'''
depreciated function on getting MW from JSON
Expand Down Expand Up @@ -692,7 +690,6 @@ def get_total_fits(self):
num += len(val['fits'])
return num


def get_fit_image(self):
'''
get fit image from fit, deprecated
Expand Down
45 changes: 42 additions & 3 deletions master/pyext/src/validation/sas_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def __init__(self,mmcif_file):
self.filename_add = os.path.join('static/images//')

def plot_intensities(self,sasbdb,df):
'''
plot intensities with errors
'''
output_file(self.ID+sasbdb+"intensities.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Log I(q) vs q with error bars ("+sasbdb+")")
Expand All @@ -57,6 +60,9 @@ def plot_intensities(self,sasbdb,df):


def plot_intensities_log(self,sasbdb,df):
'''
plot intensities on a log scale with errors
'''
output_file(self.ID+sasbdb+"intensities_log.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Log I(q) vs Log q with error bars ("+sasbdb+")")
Expand All @@ -78,13 +84,13 @@ def plot_intensities_log(self,sasbdb,df):
export_svgs(p,height=500, width=500,filename=self.filename_add+'/'+self.ID+sasbdb+"intensities_log.svg")

def plot_kratky_dep(self,sasbdb,df):
'''
plot kratky plot, deprecated function
'''
output_file(self.ID+sasbdb+"Kratky_dep.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Kratky plot ("+sasbdb+")")
p.circle(x='Q',y='Ky',source=source,color='blue',fill_alpha=0.3,size=5)
#vline = Span(location=0.1732, dimension='height', line_color='red', line_width=3)
#hline = Span(location=0.1104, dimension='width', line_color='green', line_width=3)
#p.renderers.extend([vline, hline])
p.xaxis.major_label_text_font_size="14pt"
p.yaxis.major_label_text_font_size="14pt"
p.title.text_font_size='12pt'
Expand All @@ -101,6 +107,9 @@ def plot_kratky_dep(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"Kratky_dep.svg")

def plot_kratky(self,sasbdb,df):
'''
plot dimensionless kratky
'''
output_file(self.ID+sasbdb+"Kratky.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Dimensionless Kratky plot ("+sasbdb+")")
Expand All @@ -124,6 +133,9 @@ def plot_kratky(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"Kratky.svg")

def plot_porod_debye(self,sasbdb,df):
'''
porod debye plot for flexibility
'''
output_file(self.ID+sasbdb+"porod.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Porod-Debye plot ("+sasbdb+")")
Expand All @@ -144,6 +156,9 @@ def plot_porod_debye(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"porod.svg")

def plot_pddf(self,sasbdb,df):
'''
p(r) plot, deprecated function
'''
output_file(self.ID+sasbdb+"pddf.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Pair distance distribution function ("+sasbdb+")")
Expand All @@ -165,6 +180,9 @@ def plot_pddf(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf.svg")

def plot_pddf_residuals(self,sasbdb,df):
'''
p(r) residuals
'''
output_file(self.ID+sasbdb+"pddf_residuals.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Residuals for P(r) fit ("+sasbdb+")")
Expand All @@ -187,6 +205,9 @@ def plot_pddf_residuals(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf_residuals.svg")

def plot_pddf_residuals_wt(self,sasbdb,df):
'''
p(r) error weighted residuals
'''
output_file(self.ID+sasbdb+"pddf_residuals_wt.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Error weighted residuals for P(r) fit ("+sasbdb+")")
Expand All @@ -209,6 +230,9 @@ def plot_pddf_residuals_wt(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf_residuals_wt.svg")

def plot_pddf_int(self,sasbdb,df_int,df_pofr):
'''
p(r) with fit
'''
output_file(self.ID+sasbdb+"pddf_int.html",mode="inline")
source1 = ColumnDataSource(df_int)
source2=ColumnDataSource(df_pofr)
Expand All @@ -235,6 +259,9 @@ def plot_pddf_int(self,sasbdb,df_int,df_pofr):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf_int.svg")

def Guinier_plot_fit(self,sasbdb,df,score):
'''
Gunier plot with fit
'''
output_file(self.ID+sasbdb+"guinier.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Guinier plot for "+sasbdb+" (R\u00B2="+str(score)+")")
Expand All @@ -259,6 +286,9 @@ def Guinier_plot_fit(self,sasbdb,df,score):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"guinier.svg")

def Guinier_plot_residuals(self,sasbdb,df):
'''
Guinier plot residuals
'''
output_file(self.ID+sasbdb+"guinier_residuals.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Residuals for Guinier plot fit ("+sasbdb+")")
Expand All @@ -281,6 +311,9 @@ def Guinier_plot_residuals(self,sasbdb,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"guinier_residuals.svg")

def plot_fit(self,sasbdb,fit,score,df):
'''
plot chi-squared fit
'''
output_file(self.ID+sasbdb+str(fit)+"fit1.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Model fit for "+sasbdb)
Expand All @@ -306,6 +339,9 @@ def plot_fit(self,sasbdb,fit,score,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+str(fit)+"fit1.svg")

def plot_fit_residuals(self,sasbdb,fit,df):
'''
plot residuals for each fit
'''
output_file(self.ID+sasbdb+str(fit)+"residuals.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Residuals for model fit ("+sasbdb+")")
Expand All @@ -328,6 +364,9 @@ def plot_fit_residuals(self,sasbdb,fit,df):
export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+str(fit)+"residuals.svg")

def plot_fit_residuals_wt(self,sasbdb,fit,df):
'''
plot error weighted residuals for each fit
'''
output_file(self.ID+sasbdb+str(fit)+"residuals_wt.html",mode="inline")
source = ColumnDataSource(df)
p = figure(plot_height=500, plot_width=500, title="Error-weighted residuals for model fit ("+sasbdb+")")
Expand Down
Loading

0 comments on commit 3623842

Please sign in to comment.