clean up

salilab · Aug 17, 2020 · 3623842 · 3623842
1 parent b5766c2
commit 3623842
Show file tree

Hide file tree

Showing 6 changed files with 129 additions and 73 deletions.
diff --git a/master/pyext/src/validation/Report.py b/master/pyext/src/validation/Report.py
@@ -32,6 +32,9 @@ def __init__(self,mmcif_file):
 		self.I=get_input_information(self.mmcif_file)
 
 	def run_entry_composition(self,Template_Dict):
+		'''
+		get entry composition, relies on IHM library
+		'''
 		start=time.process_time()
 		name=self.mmcif_file.split('.')[0].split('_')[0]
 		if self.I.get_ensembles():
@@ -68,26 +71,31 @@ def run_entry_composition(self,Template_Dict):
 		return Template_Dict
 
 	def run_model_quality(self,Template_Dict):
+		'''
+		get excluded volume for multiscale models
+		get molprobity info for atomic models
+		exception: models with DNA--we need a way to assess models with DNA
+		'''
 		print ("exo",self.I.check_sphere())
 		if self.I.check_sphere()<1:
 			#global clashscore; global rama; global sidechain;
 			exv_data=None
 			I_mp=molprobity.get_molprobity_information(self.mmcif_file)
 			if I_mp.check_for_molprobity():
-				filename = os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_mp.txt'))
+				filename = os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_mp.txt'))
 				print (filename)
 				if os.path.exists(filename):
 					d_mp={}
 					print ("Molprobity analysis file already exists...\n...assuming clashscores, Ramachandran and rotamer outliers have already been calculated")
 					with open(filename,'rb') as fp:
 						d_mp['molprobity']=pickle.load(fp)
-					f_rota=os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_rota.txt'))
+					f_rota=os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_rota.txt'))
 					with open(f_rota,'rb') as fp:
 						d_mp['rota']=pickle.load(fp)
-					f_rama=os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_rama.txt'))
+					f_rama=os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_rama.txt'))
 					with open(f_rama,'rb') as fp:
 						d_mp['rama']=pickle.load(fp)
-					f_clash=os.path.abspath(os.path.join(os.getcwd(), 'Output/results/',str(Template_Dict['ID'])+'_temp_clash.txt'))
+					f_clash=os.path.abspath(os.path.join(os.getcwd(), 'static/results/',str(Template_Dict['ID'])+'_temp_clash.txt'))
 					with open(f_clash,'rb') as fp:
 						d_mp['clash']=pickle.load(fp)
 				else:
@@ -112,26 +120,35 @@ def run_model_quality(self,Template_Dict):
 				Template_Dict['assess_atomic_segments']='Clashscore: '+ str(clashscore) + ', Ramachandran outliers: '+ str(rama)+ '% '+', Sidechain outliers: '+str(sidechain)+'%'
 				Template_Dict['assess_excluded_volume']=['Not applicable']
 			else:
-				self.I.rewrite_mmcif()
+				if I_mp.check_for_molprobity()==False:
+					self.I.rewrite_mmcif()
+					I_mp=molprobity.get_molprobity_information('test.cif')
+					print ("file rewritten")
 				if I_mp.check_for_molprobity():
 					print ("Molprobity analysis is being calculated...")
 					manager = Manager()
 					d_mp=manager.dict()
-					runInParallel(I_mp.run_clashscore(d_mp),I_mp.run_ramalyze(d_mp),I_mp.run_rotalyze(d_mp),I_mp.run_molprobity(d_mp))
-					a,b=I_mp.process_molprobity(d_mp['molprobity'])
-					Template_Dict['bond']=len(a); Template_Dict['angle']=len(b)
-					clashscore,rama,sidechain=I_mp.get_data_for_quality_at_glance(d_mp['molprobity'])
-					Template_Dict['molp_b']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_bonds(a))
-					Template_Dict['molp_a']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_angles(b))
-					Template_Dict['rotascore']=utility.dict_to_JSlist(I_mp.rota_summary_table(I_mp.process_rota(d_mp['rota'])))
-					Template_Dict['rotalist']=utility.dict_to_JSlist(I_mp.rota_detailed_table(I_mp.process_rota(d_mp['rota'])))
-					Template_Dict['ramascore']=utility.dict_to_JSlist(I_mp.rama_summary_table(I_mp.process_rama(d_mp['rama'])))
-					Template_Dict['ramalist']=utility.dict_to_JSlist(I_mp.rama_detailed_table(I_mp.process_rama(d_mp['rama'])))
-					clashscores,Template_Dict['tot']=I_mp.clash_summary_table(d_mp['clash'])
-					Template_Dict['clashscore_list']=utility.dict_to_JSlist(clashscores)
-					Template_Dict['clashlist']=I_mp.clash_detailed_table(d_mp['clash'])
-					Template_Dict['assess_atomic_segments']='Clashscore: '+ str(clashscore) + ', Ramachandran outliers: '+ str(rama)+ '% '+', Sidechain outliers: '+str(sidechain)+'%'
-					Template_Dict['assess_excluded_volume']=['Not applicable']
+					try:
+						runInParallel(I_mp.run_clashscore(d_mp),I_mp.run_ramalyze(d_mp),I_mp.run_rotalyze(d_mp),I_mp.run_molprobity(d_mp))
+						a,b=I_mp.process_molprobity(d_mp['molprobity'])
+						Template_Dict['bond']=len(a); Template_Dict['angle']=len(b)
+						clashscore,rama,sidechain=I_mp.get_data_for_quality_at_glance(d_mp['molprobity'])
+						Template_Dict['molp_b']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_bonds(a))
+						Template_Dict['molp_a']=utility.dict_to_JSlist(I_mp.molprobity_detailed_table_angles(b))
+						Template_Dict['rotascore']=utility.dict_to_JSlist(I_mp.rota_summary_table(I_mp.process_rota(d_mp['rota'])))
+						Template_Dict['rotalist']=utility.dict_to_JSlist(I_mp.rota_detailed_table(I_mp.process_rota(d_mp['rota'])))
+						Template_Dict['ramascore']=utility.dict_to_JSlist(I_mp.rama_summary_table(I_mp.process_rama(d_mp['rama'])))
+						Template_Dict['ramalist']=utility.dict_to_JSlist(I_mp.rama_detailed_table(I_mp.process_rama(d_mp['rama'])))
+						clashscores,Template_Dict['tot']=I_mp.clash_summary_table(d_mp['clash'])
+						Template_Dict['clashscore_list']=utility.dict_to_JSlist(clashscores)
+						Template_Dict['clashlist']=I_mp.clash_detailed_table(d_mp['clash'])
+						Template_Dict['assess_atomic_segments']='Clashscore: '+ str(clashscore) + ', Ramachandran outliers: '+ str(rama)+ '% '+', Sidechain outliers: '+str(sidechain)+'%'
+						Template_Dict['assess_excluded_volume']=['Not applicable']
+					except:
+						print ("Molprobity cannot be calculated...")
+						clashscore=None
+						rama=None
+						sidechain=None                
 		else:
 			Template_Dict['assess_atomic_segments']='Not applicable'
 			file=os.getcwd()+'Output/results/'+str(Template_Dict['ID'])+'exv.txt'
@@ -154,15 +171,22 @@ def run_model_quality(self,Template_Dict):
 		return Template_Dict,clashscore,rama,sidechain,exv_data
 
 	def run_sas_validation(self,Template_Dict):
-		#global sas_data; global sas_fit;
+		'''
+		get sas validation information from SASCIF or JSON files
+		'''
 		if self.I.check_for_sas(self.I.get_dataset_comp()):
 			Template_Dict['sas']=["True"]
 			I_sas=sas.sas_validation(self.mmcif_file)
 			Template_Dict['p_val']=utility.dict_to_JSlist(I_sas.get_pvals())
 			Template_Dict['sasdb_code']=I_sas.get_SASBDB_code()
-			Template_Dict['parameters_volume']=utility.dict_to_JSlist(I_sas.get_parameters_vol_many())
-			Template_Dict['parameters_mw']=utility.dict_to_JSlist(I_sas.get_parameters_mw_many())
-			#Template_Dict['parameters_mw_sascif']=utility.dict_to_JSlist(I_sas.get_mw_from_sascif())
+			try:		
+				Template_Dict['parameters_volume']=utility.dict_to_JSlist(I_sas.get_parameters_vol_many())
+			except:
+				Template_Dict['parameters_volume']=utility.dict_to_JSlist(I_sas.get_parameters_vol_many_dep())
+			try:
+				Template_Dict['parameters_mw']=utility.dict_to_JSlist(I_sas.get_parameters_mw_many())
+			except:
+				Template_Dict['parameters_mw']=utility.dict_to_JSlist(I_sas.get_parameters_mw_many_dep())
 			Template_Dict['pddf_info']=utility.dict_to_JSlist(I_sas.get_pddf_info())
 			Template_Dict['number_of_fits']=I_sas.get_total_fits()
 			Template_Dict['chi_table']=utility.dict_to_JSlist(I_sas.get_chi_table())
@@ -172,25 +196,28 @@ def run_sas_validation(self,Template_Dict):
 			Template_Dict['validation_input']=utility.get_rg_data_fits(I_sas.get_fits_for_plot())
 			if len(Template_Dict['validation_input'])<1:
 				Template_Dict['validation_input']=['Fit of model to data has not been deposited']
-			I_sas_plt=validation.sas_plots.sas_validation_plots(self.mmcif_file)
-			I_sas.modify_intensity()
-			I_sas.get_pofr_errors()	
-			I_sas_plt.plot_multiple()
-			I_sas_plt.plot_pf()
-			I_sas_plt.plot_Guinier()
-			if Template_Dict['number_of_fits']>0:
-				I_sas_plt.plot_fits()
-			#I_sas_plt.plot_residuals()
-			#I_sas.get_fit_image()
+			try:
+				I_sas_plt=validation.sas_plots.sas_validation_plots(self.mmcif_file)
+				I_sas.modify_intensity()
+				I_sas.get_pofr_errors()	
+				I_sas_plt.plot_multiple()
+				I_sas_plt.plot_pf()
+				I_sas_plt.plot_Guinier()
+				if Template_Dict['number_of_fits']>0:
+					I_sas_plt.plot_fits()
+			except:
+				pass
 			sas_data=I_sas.get_rg_for_plot()
 			sas_fit=I_sas.get_fits_for_plot()
-
 		else:
 			sas_data={}
 			sas_fit={}
 		return Template_Dict,sas_data,sas_fit
 
 	def run_quality_glance(self,clashscore,rama,sidechain,exv_data,sas_data,sas_fit):
+		'''
+		get quality at glance image; will be updated as validation report is updated
+		'''
 		I_plt=get_plots.plots(self.mmcif_file)
 		I_plt.plot_quality_at_glance(clashscore,rama,sidechain,exv_data,sas_data,sas_fit)
 
@@ -205,7 +232,9 @@ def run_supplementary_table(self,
 								Data_quality=['-'],
 								clustering='N/A',
 								resolution='N/A'):
-
+		'''
+		get supplementary table, will be updated as validation report is updated
+		'''
 		if (self.I.get_ensembles() is not None) and  (utility.all_same(self.I.get_ensembles()['Clustering method'])):
 			Template_Dict['clustering']=self.I.get_ensembles()['Clustering method'][0]
 		elif self.I.get_ensembles() is not None:

diff --git a/master/pyext/src/validation/__init__.py b/master/pyext/src/validation/__init__.py
@@ -518,11 +518,11 @@ def mmcif_get_lists(self,filetemp=None):
                 if len(j)<=index_occu:
                     j.extend(['1'])
                 elif j[index_occu]=='.':
-                    j[index_occu]='1'
+                    j[index_occu]='0.67'
                 if len(j)<=index_biso:
                     j.extend(['1'])
                 elif j[index_biso]=='.':
-                    j[index_biso]='1'
+                    j[index_biso]='0.00'
                 atoms[i]=j
             elif len(j)> 0 and  (i > list(atom_site.keys())[-1]):
                 if len(after_atom)==0:
@@ -536,7 +536,7 @@ def rewrite_mmcif(self):
         if os.path.isfile('test.cif'):
             os.remove('test.cif')
         file_re=open('test.cif','w')
-        for i, j in enumerate(before_atom_site):
+        for i, j in enumerate(before_atom_site[:-1]):
             file_re.write(' '.join(j)+'\n')
         for i, j in atom_site.items():
             file_re.write(''.join(j)+'\n')

diff --git a/master/pyext/src/validation/get_plots.py b/master/pyext/src/validation/get_plots.py
@@ -28,8 +28,11 @@ def __init__(self,mmcif):
 		self.filename_add = os.path.join('static/images//')
 
 	def plot_quality_at_glance(self,clashscore,rama,sidechain,exv_data,sas_data,sas_fit):
+		'''
+		plot quality of glance with multiple tabs
+		will be updated as validation report is updated
+		'''
 		output_file(self.ID+"quality_at_glance.html",mode="inline")
-		#print (clashscore,rama,sidechain,exv_data,sas_data,sas_fit)
 		tabsI=[]
 		if clashscore or rama or sidechain:
 			counts=[clashscore,rama,sidechain]
@@ -56,6 +59,10 @@ def plot_quality_at_glance(self,clashscore,rama,sidechain,exv_data,sas_data,sas_
 			p = figure(y_range=Scores, x_range=(lower,upper), plot_height=250, plot_width=700, title='Model quality: Excluded Volume Analysis')
 			p.hbar(y='Scores',right='counts', height=0.5, color='color', legend="legends", source=source,alpha=0.8)
 			p.xaxis.axis_label = 'Number of violations'
+		else:
+			Scores=[''];counts = [''];legends=['']
+			source = ColumnDataSource(data=dict(Scores=Scores, counts=counts, legends=legends))
+			p = figure(y_range=Scores, x_range=(0,1), plot_height=250, plot_width=700)
 
 		Scores=[''];counts = [''];legends=['']
 		source = ColumnDataSource(data=dict(Scores=Scores, counts=counts, legends=legends))

diff --git a/master/pyext/src/validation/sas.py b/master/pyext/src/validation/sas.py
@@ -516,10 +516,8 @@ def get_parameters_mw_many(self):
                     else:
                         parameter_table['Porod Volume/MW'].append('N/A')
 
-
         return parameter_table
 
-
     def get_parameters_mw_many_dep(self):
         '''
         depreciated function on getting MW from JSON
@@ -692,7 +690,6 @@ def get_total_fits(self):
             num += len(val['fits'])
         return num
 
-
     def get_fit_image(self):
         '''
         get fit image from fit, deprecated

diff --git a/master/pyext/src/validation/sas_plots.py b/master/pyext/src/validation/sas_plots.py
@@ -35,6 +35,9 @@ def __init__(self,mmcif_file):
         self.filename_add = os.path.join('static/images//')
 
     def plot_intensities(self,sasbdb,df):
+        '''
+        plot intensities with errors
+        '''
         output_file(self.ID+sasbdb+"intensities.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Log I(q) vs q with error bars ("+sasbdb+")")
@@ -57,6 +60,9 @@ def plot_intensities(self,sasbdb,df):
 
 
     def plot_intensities_log(self,sasbdb,df):
+        '''
+        plot intensities on a log scale with errors
+        '''
         output_file(self.ID+sasbdb+"intensities_log.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Log I(q) vs Log q with error bars ("+sasbdb+")")
@@ -78,13 +84,13 @@ def plot_intensities_log(self,sasbdb,df):
         export_svgs(p,height=500, width=500,filename=self.filename_add+'/'+self.ID+sasbdb+"intensities_log.svg")
 
     def plot_kratky_dep(self,sasbdb,df):
+        '''
+        plot kratky plot, deprecated function
+        '''
         output_file(self.ID+sasbdb+"Kratky_dep.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Kratky plot ("+sasbdb+")")
         p.circle(x='Q',y='Ky',source=source,color='blue',fill_alpha=0.3,size=5)
-        #vline = Span(location=0.1732, dimension='height', line_color='red', line_width=3)
-        #hline = Span(location=0.1104, dimension='width', line_color='green', line_width=3)
-        #p.renderers.extend([vline, hline])
         p.xaxis.major_label_text_font_size="14pt"
         p.yaxis.major_label_text_font_size="14pt"
         p.title.text_font_size='12pt'
@@ -101,6 +107,9 @@ def plot_kratky_dep(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"Kratky_dep.svg")
 
     def plot_kratky(self,sasbdb,df):
+        '''
+        plot dimensionless kratky 
+        '''
         output_file(self.ID+sasbdb+"Kratky.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Dimensionless Kratky plot ("+sasbdb+")")
@@ -124,6 +133,9 @@ def plot_kratky(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"Kratky.svg")
 
     def plot_porod_debye(self,sasbdb,df):
+        '''
+        porod debye plot for flexibility 
+        '''
         output_file(self.ID+sasbdb+"porod.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Porod-Debye plot ("+sasbdb+")")
@@ -144,6 +156,9 @@ def plot_porod_debye(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"porod.svg")
 
     def plot_pddf(self,sasbdb,df):
+        '''
+        p(r) plot, deprecated function 
+        '''
         output_file(self.ID+sasbdb+"pddf.html",mode="inline") 
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Pair distance distribution function ("+sasbdb+")")
@@ -165,6 +180,9 @@ def plot_pddf(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf.svg")
 
     def plot_pddf_residuals(self,sasbdb,df):
+        '''
+        p(r) residuals 
+        '''
         output_file(self.ID+sasbdb+"pddf_residuals.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Residuals for P(r) fit ("+sasbdb+")")
@@ -187,6 +205,9 @@ def plot_pddf_residuals(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf_residuals.svg")
 
     def plot_pddf_residuals_wt(self,sasbdb,df):
+        '''
+        p(r) error weighted residuals
+        '''
         output_file(self.ID+sasbdb+"pddf_residuals_wt.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Error weighted residuals for P(r) fit ("+sasbdb+")")
@@ -209,6 +230,9 @@ def plot_pddf_residuals_wt(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf_residuals_wt.svg")
 
     def plot_pddf_int(self,sasbdb,df_int,df_pofr):
+        '''
+        p(r) with fit
+        '''
         output_file(self.ID+sasbdb+"pddf_int.html",mode="inline")
         source1 = ColumnDataSource(df_int)
         source2=ColumnDataSource(df_pofr)
@@ -235,6 +259,9 @@ def plot_pddf_int(self,sasbdb,df_int,df_pofr):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"pddf_int.svg")
 
     def Guinier_plot_fit(self,sasbdb,df,score):
+        '''
+        Gunier plot with fit
+        '''
         output_file(self.ID+sasbdb+"guinier.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Guinier plot for "+sasbdb+" (R\u00B2="+str(score)+")")
@@ -259,6 +286,9 @@ def Guinier_plot_fit(self,sasbdb,df,score):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"guinier.svg")
 
     def Guinier_plot_residuals(self,sasbdb,df):
+        '''
+        Guinier plot residuals
+        '''
         output_file(self.ID+sasbdb+"guinier_residuals.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Residuals for Guinier plot fit ("+sasbdb+")")
@@ -281,6 +311,9 @@ def Guinier_plot_residuals(self,sasbdb,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+"guinier_residuals.svg")
 
     def plot_fit(self,sasbdb,fit,score,df):
+        '''
+        plot chi-squared fit
+        '''
         output_file(self.ID+sasbdb+str(fit)+"fit1.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Model fit for "+sasbdb)        
@@ -306,6 +339,9 @@ def plot_fit(self,sasbdb,fit,score,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+str(fit)+"fit1.svg")
 
     def plot_fit_residuals(self,sasbdb,fit,df):
+        '''
+        plot residuals for each fit
+        '''
         output_file(self.ID+sasbdb+str(fit)+"residuals.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Residuals for model fit ("+sasbdb+")")
@@ -328,6 +364,9 @@ def plot_fit_residuals(self,sasbdb,fit,df):
         export_svgs(p,filename=self.filename_add+'/'+self.ID+sasbdb+str(fit)+"residuals.svg")
 
     def plot_fit_residuals_wt(self,sasbdb,fit,df):
+        '''
+        plot error weighted residuals for each fit
+        '''
         output_file(self.ID+sasbdb+str(fit)+"residuals_wt.html",mode="inline")
         source = ColumnDataSource(df)
         p = figure(plot_height=500, plot_width=500, title="Error-weighted residuals for model fit ("+sasbdb+")")