forked from MobleyLab/alchemical-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalchemical_analysis.py
1127 lines (978 loc) · 53.6 KB
/
alchemical_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/env python
# Originally written by Michael Shirts as:
# Example illustrating the use of MBAR for computing the hydration free energy of OPLS 3-methylindole
# in TIP3P water through alchemical free energy simulations.
# Adapted by P. Klimovich and D. Mobley, March 2011, to be slightly more general.
# Additionally adapted by Michael Shirts and P. Klimovich, May 2013, Dec 2014.
#===================================================================================================
# IMPORTS
#===================================================================================================
## Not a built-in module. Will be called from main, whenever needed. ##
## import pymbar Multistate Bennett Acceptance Ratio estimator. ##
import numpy
import pickle # for full-precision data storage
from optparse import OptionParser # for parsing command-line options
import os # for os interface
import time as ttt_time # for timing
import pdb # for debugging
#===================================================================================================
# INPUT OPTIONS
#===================================================================================================
parser = OptionParser()
parser.add_option('-a', '--software', dest = 'software', help = 'Package\'s name the data files come from: Gromacs, Sire, or AMBER. Default: Gromacs.', default = 'Gromacs')
parser.add_option('-c', '--cfm', dest = 'bCFM', help = 'The Curve-Fitting-Method-based consistency inspector. Default: False.', default = False, action = 'store_true')
parser.add_option('-d', '--dir', dest = 'datafile_directory', help = 'Directory in which data files are stored. Default: Current directory.', default = '.')
parser.add_option('-f', '--forwrev', dest = 'bForwrev', help = 'Plotting the free energy change as a function of time in both directions. The number of time points (an integer) is to be followed the flag. Default: 0', default = 0, type=int)
parser.add_option('-g', '--breakdown', dest = 'breakdown', help = 'Plotting the free energy differences evaluated for each pair of adjacent states for all methods. Default: False.', default = False, action = 'store_true')
parser.add_option('-i', '--threshold', dest = 'uncorr_threshold', help = 'Proceed with correlated samples if the number of uncorrelated samples is found to be less than this number. If 0 is given, the time series analysis will not be performed at all. Default: 50.', default = 50, type=int)
parser.add_option('-k', '--koff', dest = 'bSkipLambdaIndex', help = 'Give a string of lambda indices separated by \'-\' and they will be removed from the analysis. (Another approach is to have only the files of interest present in the directory). Default: None.', default = '')
parser.add_option('-m', '--methods', dest = 'methods', help = 'A list of the methods to esitimate the free energy with. Default: [TI, TI-CUBIC, DEXP, IEXP, BAR, MBAR]. To add/remove methods to the above list provide a string formed of the method strings preceded with +/-. For example, \'-ti_cubic+gdel\' will turn methods into [TI, DEXP, IEXP, BAR, MBAR, GDEL]. \'ti_cubic+gdel\', on the other hand, will call [TI-CUBIC, GDEL]. \'all\' calls the full list of supported methods [TI, TI-CUBIC, DEXP, IEXP, GINS, GDEL, BAR, UBAR, RBAR, MBAR].', default = '')
parser.add_option('-o', '--out', dest = 'output_directory', help = 'Directory in which the output files produced by this script will be stored. Default: Same as datafile_directory.', default = '')
parser.add_option('-p', '--prefix', dest = 'prefix', help = 'Prefix for datafile sets, i.e.\'dhdl\' (default).', default = 'dhdl')
parser.add_option('-q', '--suffix', dest = 'suffix', help = 'Suffix for datafile sets, i.e. \'xvg\' (default).', default = 'xvg')
parser.add_option('-r', '--decimal', dest = 'decimal', help = 'The number of decimal places the free energies are to be reported with. No worries, this is for the text output only; the full-precision data will be stored in \'results.pickle\'. Default: 3.', default = 3, type=int)
parser.add_option('-s', '--skiptime', dest = 'equiltime', help = 'Discard data prior to this specified time as \'equilibration\' data. Units picoseconds. Default: 0 ps.', default = 0, type=float)
parser.add_option('-t', '--temperature', dest = 'temperature', help = "Temperature in K. Default: 298 K.", default = 298, type=float)
parser.add_option('-u', '--units', dest = 'units', help = 'Units to report energies: \'kJ\', \'kcal\', and \'kBT\'. Default: \'kJ\'', default = 'kJ')
parser.add_option('-v', '--verbose', dest = 'verbose', help = 'Verbose option. Default: False.', default = False, action = 'store_true')
parser.add_option('-w', '--overlap', dest = 'overlap', help = 'Print out and plot the overlap matrix. Default: False.', default = False, action = 'store_true')
parser.add_option('-x', '--ignoreWL', dest = 'bIgnoreWL', help = 'Do not check whether the WL weights are equilibrated. No log file needed as an accompanying input.', default = False, action = 'store_true')
parser.add_option('-y', '--tolerance', dest = 'relative_tolerance', help = "Convergence criterion for the energy estimates with BAR and MBAR. Default: 1e-10.", default = 1e-10, type=float)
parser.add_option('-z', '--initialize', dest = 'init_with', help = 'The initial MBAR free energy guess; either \'BAR\' or \'zeroes\'. Default: \'BAR\'.', default = 'BAR')
#===================================================================================================
# FUNCTIONS: Miscellanea.
#===================================================================================================
def getMethods(string):
"""Returns a list of the methods the free energy is to be estimated with."""
all_methods = ['TI','TI-CUBIC','DEXP','IEXP','GINS','GDEL','BAR','UBAR','RBAR','MBAR']
methods = ['TI','TI-CUBIC','DEXP','IEXP','BAR','MBAR']
if (numpy.array(['Sire', 'Amber']) == P.software.title()).any():
methods = ['TI','TI-CUBIC']
if not string:
return methods
def addRemove(string):
operation = string[0]
string = string[1:]+'+'
method = ''
for c in string:
if c.isalnum():
method += c
elif c=='_':
method += '-'
elif (c=='-' or c=='+'):
if method in all_methods:
if operation=='-':
if method in methods:
methods.remove(method)
else:
if not method in methods:
methods.append(method)
method = ''
operation = c
else:
parser.error("\nThere is no '%s' in the list of supported methods." % method)
else:
parser.error("\nUnknown character '%s' in the method string is found." % c)
return
if string=='ALL':
methods = all_methods
else:
primo = string[0]
if primo.isalpha():
methods = string.replace('+', ' ').replace('_', '-').split()
methods = [m for m in methods if m in all_methods]
elif primo=='+' or primo=='-':
addRemove(string)
else:
parser.error("\nUnknown character '%s' in the method string is found." % primo)
return methods
def checkUnitsAndMore(units):
kB = 1.3806488*6.02214129/1000.0 # Boltzmann's constant (kJ/mol/K).
beta = 1./(kB*P.temperature)
b_kcal = (numpy.array(['Sire', 'Amber']) == P.software.title()).any()
if units == 'kJ':
beta_report = beta/4.184**b_kcal
units = '(kJ/mol)'
elif units == 'kcal':
beta_report = 4.184**(not b_kcal)*beta
units = '(kcal/mol)'
elif units == 'kBT':
beta_report = 1
units = '(k_BT)'
else:
parser.error('\nI don\'t understand the unit type \'%s\': the only options \'kJ\', \'kcal\', and \'kBT\'' % units)
if not P.output_directory:
P.output_directory = P.datafile_directory
if P.overlap:
if not 'MBAR' in P.methods:
parser.error("\nMBAR is not in 'methods'; can't plot the overlap matrix.")
return units, beta, beta_report
def timeStatistics(stime):
etime = ttt_time.time()
tm = int((etime-stime)/60.)
th = int(tm/60.)
ts = '%.2f' % (etime-stime-60*(tm+60*th))
return th, tm, ts, ttt_time.asctime()
#===================================================================================================
# FUNCTIONS: The autocorrelation analysis.
#===================================================================================================
def uncorrelate(sta, fin, do_dhdl=False):
"""Identifies uncorrelated samples and updates the arrays of the reduced potential energy and dhdlt retaining data entries of these samples only.
'sta' and 'fin' are the starting and final snapshot positions to be read, both are arrays of dimension K."""
if not P.uncorr_threshold:
if P.software.title()=='Sire':
return dhdlt, nsnapshots, None
return dhdlt, nsnapshots, u_klt
import pymbar ## this is not a built-in module ##
u_kln = numpy.zeros([K,K,max(fin-sta)], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m
N_k = numpy.zeros(K, int) # N_k[k] is the number of uncorrelated samples from state k
g = numpy.zeros(K,float) # autocorrelation times for the data
if do_dhdl:
dhdl = numpy.zeros([K,n_components,max(fin-sta)], float) #dhdl is value for dhdl for each component in the file at each time.
print "\n\nNumber of correlated and uncorrelated samples:\n\n%6s %12s %12s %12s\n" % ('State', 'N', 'N_k', 'N/N_k')
for k in range(K):
# Sum up over the energy components; notice, that only the relevant data is being used in the third dimension.
dhdl_sum = numpy.sum(dhdlt[k,:,sta[k]:fin[k]], axis=0)
# Determine indices of uncorrelated samples from potential autocorrelation analysis at state k
# (alternatively, could use the energy differences -- here, we will use total dhdl).
g[k] = pymbar.timeseries.statisticalInefficiency(dhdl_sum)
indices = sta[k] + numpy.array(pymbar.timeseries.subsampleCorrelatedData(dhdl_sum, g=g[k])) # indices of uncorrelated samples
N = len(indices) # number of uncorrelated samples
# Handle case where we end up with too few.
if N < P.uncorr_threshold:
if do_dhdl:
print "WARNING: Only %s uncorrelated samples found at lambda number %s; proceeding with analysis using correlated samples..." % (N, k)
indices = sta[k] + numpy.arange(len(dhdl_sum))
N = len(indices)
N_k[k] = N # Store the number of uncorrelated samples from state k.
if not (u_klt is None):
for l in range(K):
u_kln[k,l,0:N] = u_klt[k,l,indices]
if do_dhdl:
print "%6s %12s %12s %12.2f" % (k, fin[k], N_k[k], g[k])
for n in range(n_components):
dhdl[k,n,0:N] = dhdlt[k,n,indices]
if do_dhdl:
return (dhdl, N_k, u_kln)
return (N_k, u_kln)
#===================================================================================================
# FUNCTIONS: The MBAR workhorse.
#===================================================================================================
def estimatewithMBAR(u_kln, N_k, reltol, regular_estimate=False):
"""Computes the MBAR free energy given the reduced potential and the number of relevant entries in it."""
def plotOverlapMatrix(O):
"""Plots the probability of observing a sample from state i (row) in state j (column).
For convenience, the neigboring state cells are fringed in bold."""
max_prob = O.max()
fig = pl.figure(figsize=(K/2.,K/2.))
fig.add_subplot(111, frameon=False, xticks=[], yticks=[])
for i in range(K):
if i!=0:
pl.axvline(x=i, ls='-', lw=0.5, color='k', alpha=0.25)
pl.axhline(y=i, ls='-', lw=0.5, color='k', alpha=0.25)
for j in range(K):
if O[j,i] < 0.005:
ii = ''
else:
ii = ("%.2f" % O[j,i])[1:]
alf = O[j,i]/max_prob
pl.fill_between([i,i+1], [K-j,K-j], [K-(j+1),K-(j+1)], color='k', alpha=alf)
pl.annotate(ii, xy=(i,j), xytext=(i+0.5,K-(j+0.5)), size=8, textcoords='data', va='center', ha='center', color=('k' if alf < 0.5 else 'w'))
if P.bSkipLambdaIndex:
ks = [int(l) for l in P.bSkipLambdaIndex.split('-')]
ks = numpy.delete(numpy.arange(K+len(ks)), ks)
else:
ks = range(K)
for i in range(K):
pl.annotate(ks[i], xy=(i+0.5, 1), xytext=(i+0.5, K+0.5), size=10, textcoords=('data', 'data'), va='center', ha='center', color='k')
pl.annotate(ks[i], xy=(-0.5, K-(j+0.5)), xytext=(-0.5, K-(i+0.5)), size=10, textcoords=('data', 'data'), va='center', ha='center', color='k')
pl.annotate('$\lambda$', xy=(-0.5, K-(j+0.5)), xytext=(-0.5, K+0.5), size=10, textcoords=('data', 'data'), va='center', ha='center', color='k')
pl.plot([0,K], [0,0], 'k-', lw=4.0, solid_capstyle='butt')
pl.plot([K,K], [0,K], 'k-', lw=4.0, solid_capstyle='butt')
pl.plot([0,0], [0,K], 'k-', lw=2.0, solid_capstyle='butt')
pl.plot([0,K], [K,K], 'k-', lw=2.0, solid_capstyle='butt')
cx = sorted(2*range(K+1))
cy = sorted(2*range(K+1), reverse=True)
pl.plot(cx[2:-1], cy[1:-2], 'k-', lw=2.0)
pl.plot(numpy.array(cx[2:-3])+1, cy[1:-4], 'k-', lw=2.0)
pl.plot(cx[1:-2], numpy.array(cy[:-3])-1, 'k-', lw=2.0)
pl.plot(cx[1:-4], numpy.array(cy[:-5])-2, 'k-', lw=2.0)
pl.xlim(-1, K)
pl.ylim(0, K+1)
pl.savefig(os.path.join(P.output_directory, 'O_MBAR.pdf'), bbox_inches='tight', pad_inches=0.0)
pl.close(fig)
return
if regular_estimate:
print "\nEstimating the free energy change with MBAR..."
MBAR = pymbar.mbar.MBAR(u_kln, N_k, verbose = P.verbose, method = 'adaptive', relative_tolerance = reltol, initialize = P.init_with)
# Get matrix of dimensionless free energy differences and uncertainty estimate.
(Deltaf_ij, dDeltaf_ij, theta_ij) = MBAR.getFreeEnergyDifferences(uncertainty_method='svd-ew')
if P.verbose:
print "Matrix of free energy differences\nDeltaf_ij:\n%s\ndDeltaf_ij:\n%s" % (Deltaf_ij, dDeltaf_ij)
if regular_estimate:
if P.overlap:
print "The overlap matrix is..."
O = MBAR.computeOverlap()[2]
for k in range(K):
line = ''
for l in range(K):
line += ' %5.2f ' % O[k, l]
print line
plotOverlapMatrix(O)
print "\nFor a nicer figure look at 'O_MBAR.pdf'"
return (Deltaf_ij, dDeltaf_ij)
return (Deltaf_ij[0,K-1]/P.beta_report, dDeltaf_ij[0,K-1]/P.beta_report)
#===================================================================================================
# FUNCTIONS: Thermodynamic integration.
#===================================================================================================
class naturalcubicspline:
def __init__(self, x):
# define some space
L = len(x)
H = numpy.zeros([L,L],float)
M = numpy.zeros([L,L],float)
BW = numpy.zeros([L,L],float)
AW = numpy.zeros([L,L],float)
DW = numpy.zeros([L,L],float)
h = x[1:L]-x[0:L-1]
ih = 1.0/h
# define the H and M matrix, from p. 371 "applied numerical methods with matlab, Chapra"
H[0,0] = 1
H[L-1,L-1] = 1
for i in range(1,L-1):
H[i,i] = 2*(h[i-1]+h[i])
H[i,i-1] = h[i-1]
H[i,i+1] = h[i]
M[i,i] = -3*(ih[i-1]+ih[i])
M[i,i-1] = 3*(ih[i-1])
M[i,i+1] = 3*(ih[i])
CW = numpy.dot(numpy.linalg.inv(H),M) # this is the matrix translating c to weights in f.
# each row corresponds to the weights for each c.
# from CW, define the other coefficient matrices
for i in range(0,L-1):
BW[i,:] = -(h[i]/3)*(2*CW[i,:]+CW[i+1,:])
BW[i,i] += -ih[i]
BW[i,i+1] += ih[i]
DW[i,:] = (ih[i]/3)*(CW[i+1,:]-CW[i,:])
AW[i,i] = 1
# Make copies of the arrays we'll be using in the future.
self.x = x.copy()
self.AW = AW.copy()
self.BW = BW.copy()
self.CW = CW.copy()
self.DW = DW.copy()
# find the integrating weights
self.wsum = numpy.zeros([L],float)
self.wk = numpy.zeros([L-1,L],float)
for k in range(0,L-1):
w = DW[k,:]*(h[k]**4)/4.0 + CW[k,:]*(h[k]**3)/3.0 + BW[k,:]*(h[k]**2)/2.0 + AW[k,:]*(h[k])
self.wk[k,:] = w
self.wsum += w
def interpolate(self,y,xnew):
if len(self.x) != len(y):
parser.error("\nThe length of 'y' should be consistent with that of 'self.x'. I cannot perform linear algebra operations.")
# get the array of actual coefficients by multiplying the coefficient matrix by the values
a = numpy.dot(self.AW,y)
b = numpy.dot(self.BW,y)
c = numpy.dot(self.CW,y)
d = numpy.dot(self.DW,y)
N = len(xnew)
ynew = numpy.zeros([N],float)
for i in range(N):
# Find the index of 'xnew[i]' it would have in 'self.x'.
j = numpy.searchsorted(self.x, xnew[i]) - 1
lamw = xnew[i] - self.x[j]
ynew[i] = d[j]*lamw**3 + c[j]*lamw**2 + b[j]*lamw + a[j]
# Preserve the terminal points.
ynew[0] = y[0]
ynew[-1] = y[-1]
return ynew
def TIprelim(lv):
# Lambda vectors spacing.
dlam = numpy.diff(lv, axis=0)
lchange = numpy.zeros([K,n_components],bool) # booleans for which lambdas are changing
for j in range(n_components):
# need to identify range over which lambda doesn't change, and not interpolate over that range.
for k in range(K-1):
if (lv[k+1,j]-lv[k,j] > 0):
lchange[k,j] = True
lchange[k+1,j] = True
if 'ave_dhdl' in globals() and 'std_dhdl' in globals():
return lchange, dlam, globals()['ave_dhdl'], globals()['std_dhdl']
# Compute <dhdl> and std(dhdl) for each component, for each lambda; multiply them by beta to make unitless.
ave_dhdl = numpy.zeros([K,n_components],float)
std_dhdl = numpy.zeros([K,n_components],float)
for k in range(K):
ave_dhdl[k,:] = P.beta*numpy.average(dhdl[k,:,0:N_k[k]],axis=1)
std_dhdl[k,:] = P.beta*numpy.std(dhdl[k,:,0:N_k[k]],axis=1)/numpy.sqrt(N_k[k]-1)
return lchange, dlam, ave_dhdl, std_dhdl
def getSplines(lchange):
# construct a map back to the original components
mapl = numpy.zeros([K,n_components],int) # map back to the original k from the components
for j in range(n_components):
incr = 0
for k in range(K):
if (lchange[k,j]):
mapl[k,j] += incr
incr +=1
# put together the spline weights for the different components
cubspl = list()
for j in range(n_components):
lv_lchange = lv[lchange[:,j],j]
if len(lv_lchange) == 0: # handle the all-zero lv column
cubspl.append(0)
else:
spl = naturalcubicspline(lv_lchange)
cubspl.append(spl)
return cubspl, mapl
#===================================================================================================
# FUNCTIONS: This one estimates dF and ddF for all pairs of adjacent states and stores them.
#===================================================================================================
def estimatePairs():
print ("Estimating the free energy change with %s..." % ', '.join(P.methods)).replace(', MBAR', '')
df_allk = list(); ddf_allk = list()
for k in range(K-1):
df = dict(); ddf = dict()
for name in P.methods:
if name == 'TI':
#===================================================================================================
# Estimate free energy difference with TI; interpolating with the trapezoidal rule.
#===================================================================================================
df['TI'] = 0.5*numpy.dot(dlam[k],(ave_dhdl[k]+ave_dhdl[k+1]))
ddf['TI'] = 0.5*numpy.sqrt(numpy.dot(dlam[k]**2,std_dhdl[k]**2+std_dhdl[k+1]**2))
if name == 'TI-CUBIC':
#===================================================================================================
# Estimate free energy difference with TI; interpolating with the natural cubic splines.
#===================================================================================================
df['TI-CUBIC'], ddf['TI-CUBIC'] = 0, 0
for j in range(n_components):
if dlam[k,j] > 0:
lj = lchange[:,j]
df['TI-CUBIC'] += numpy.dot(cubspl[j].wk[mapl[k,j]],ave_dhdl[lj,j])
ddf['TI-CUBIC'] += numpy.dot(cubspl[j].wk[mapl[k,j]]**2,std_dhdl[lj,j]**2)
ddf['TI-CUBIC'] = numpy.sqrt(ddf['TI-CUBIC'])
if any(name == m for m in ['DEXP', 'GDEL', 'BAR', 'UBAR', 'RBAR']):
w_F = u_kln[k,k+1,0:N_k[k]] - u_kln[k,k,0:N_k[k]]
if name == 'DEXP':
#===================================================================================================
# Estimate free energy difference with Forward-direction EXP (in this case, Deletion from solvent).
#===================================================================================================
(df['DEXP'], ddf['DEXP']) = pymbar.exp.EXP(w_F)
if name == 'GDEL':
#===================================================================================================
# Estimate free energy difference with a Gaussian estimate of EXP (in this case, deletion from solvent)
#===================================================================================================
(df['GDEL'], ddf['GDEL']) = pymbar.exp.EXPGauss(w_F)
if any(name == m for m in ['IEXP', 'GINS', 'BAR', 'UBAR', 'RBAR']):
w_R = u_kln[k+1,k,0:N_k[k+1]] - u_kln[k+1,k+1,0:N_k[k+1]]
if name == 'IEXP':
#===================================================================================================
# Estimate free energy difference with Reverse-direction EXP (in this case, insertion into solvent).
#===================================================================================================
(rdf,rddf) = pymbar.exp.EXP(w_R)
(df['IEXP'], ddf['IEXP']) = (-rdf,rddf)
if name == 'GINS':
#===================================================================================================
# Estimate free energy difference with a Gaussian estimate of EXP (in this case, insertion into solvent)
#===================================================================================================
(rdf,rddf) = pymbar.exp.EXPGauss(w_R)
(df['GINS'], ddf['GINS']) = (-rdf,rddf)
if name == 'BAR':
#===================================================================================================
# Estimate free energy difference with BAR; use w_F and w_R computed above.
#===================================================================================================
(df['BAR'], ddf['BAR']) = pymbar.bar.BAR(w_F, w_R, relative_tolerance=P.relative_tolerance, verbose = P.verbose)
if name == 'UBAR':
#===================================================================================================
# Estimate free energy difference with unoptimized BAR -- assume dF is zero, and just do one evaluation
#===================================================================================================
(df['UBAR'], ddf['UBAR']) = pymbar.bar.BAR(w_F, w_R, verbose = P.verbose,iterated_solution=False)
if name == 'RBAR':
#===================================================================================================
# Estimate free energy difference with Unoptimized BAR over range of free energy values, and choose the one
# that is self consistently best.
#===================================================================================================
min_diff = 1E6
best_udf = 0
for trial_udf in range(-10,10,1):
(udf, uddf) = pymbar.bar.BAR(w_F, w_R, DeltaF=trial_udf, iterated_solution=False, verbose=P.verbose)
diff = numpy.abs(udf - trial_udf)
if (diff < min_diff):
best_udf = udf
best_uddf = uddf
min_diff = diff
(df['RBAR'], ddf['RBAR']) = (best_udf,best_uddf)
if name == 'MBAR':
#===================================================================================================
# Store the MBAR free energy difference (already estimated above) properly, i.e. by state.
#===================================================================================================
(df['MBAR'], ddf['MBAR']) = Deltaf_ij[k,k+1], dDeltaf_ij[k,k+1]
df_allk = numpy.append(df_allk,df)
ddf_allk = numpy.append(ddf_allk,ddf)
return df_allk, ddf_allk
#===================================================================================================
# FUNCTIONS: All done with calculations; summarize and print stats.
#===================================================================================================
def totalEnergies():
# Count up the charging states.
numcharging = 0
for lv_n in ['coul', 'fep']:
if lv_n in P.lv_names:
ndx_char = P.lv_names.index(lv_n)
lv_char = lv[:, ndx_char]
if not (lv_char == lv_char[0]).all():
numcharging = (lv_char != 1).sum()
break
if numcharging == K:
numcharging = K-1
# Split the total energies into segments; initialize lists to store them.
segments = ['Coulomb' , 'vdWaals' , 'TOTAL']
segmentstarts = [0 , numcharging, 0 ]
segmentends = [numcharging, K-1 , K-1 ]
dFs = []
ddFs = []
# Perform the energy segmentation; be pedantic about the TI cumulative ddF's (see Section 3.1 of the paper).
for i in range(len(segments)):
segment = segments[i]; segstart = segmentstarts[i]; segend = segmentends[i]
dF = dict.fromkeys(P.methods, 0)
ddF = dict.fromkeys(P.methods, 0)
for name in P.methods:
if name == 'MBAR':
dF['MBAR'] = Deltaf_ij[segstart, segend]
ddF['MBAR'] = dDeltaf_ij[segstart, segend]
elif name[0:2] == 'TI':
for k in range(segstart, segend):
dF[name] += df_allk[k][name]
if segment == 'Coulomb':
jlist = [ndx_char] if numcharging>0 else []
elif segment == 'vdWaals':
jlist = []
elif segment == 'TOTAL':
jlist = range(n_components)
for j in jlist:
lj = lchange[:,j]
if not (lj == False).all(): # handle the all-zero lv column
if name == 'TI-CUBIC':
ddF[name] += numpy.dot((cubspl[j].wsum)**2,std_dhdl[lj,j]**2)
elif name == 'TI':
h = numpy.trim_zeros(dlam[:,j])
wsum = 0.5*(numpy.append(h,0) + numpy.append(0,h))
ddF[name] += numpy.dot(wsum**2,std_dhdl[lj,j]**2)
ddF[name] = numpy.sqrt(ddF[name])
else:
for k in range(segstart,segend):
dF[name] += df_allk[k][name]
ddF[name] += (ddf_allk[k][name])**2
ddF[name] = numpy.sqrt(ddF[name])
dFs.append(dF)
ddFs.append(ddF)
for name in P.methods: # 'vdWaals' = 'TOTAL' - 'Coulomb'
ddFs[1][name] = (ddFs[2][name]**2 - ddFs[0][name]**2)**0.5
# Display results.
def printLine(str1, str2, d1=None, d2=None):
"""Fills out the results table linewise."""
print str1,
text = str1
for name in P.methods:
if d1 == 'plain':
print str2,
text += ' ' + str2
if d1 == 'name':
print str2 % (name, P.units),
text += ' ' + str2 % (name, P.units)
if d1 and d2:
print str2 % (d1[name]/P.beta_report, d2[name]/P.beta_report),
text += ' ' + str2 % (d1[name]/P.beta_report, d2[name]/P.beta_report)
print ''
outtext.append(text + '\n')
return
d = P.decimal
str_dash = (d+7 + 6 + d+2)*'-'
str_dat = ('X%d.%df +- X%d.%df' % (d+7, d, d+2, d)).replace('X', '%')
str_names = ('X%ds X-%ds' % (d+6, d+8)).replace('X', '%')
outtext = []
printLine(12*'-', str_dash, 'plain')
printLine('%-12s' % ' States', str_names, 'name')
printLine(12*'-', str_dash, 'plain')
for k in range(K-1):
printLine('%4d -- %-4d' % (k, k+1), str_dat, df_allk[k], ddf_allk[k])
printLine(12*'-', str_dash, 'plain')
remark = ["", "A remark on the energy components interpretation: ",
" 'vdWaals' is computed as 'TOTAL' - 'Coulomb', where ",
" 'Coulomb' is found as the free energy change between ",
" the states defined by the lambda vectors (0,0,...,0) ",
" and (1,0,...,0), the only varying vector component ",
" being either 'coul-lambda' or 'fep-lambda'. "]
w = 12 + (1+len(str_dash))*len(P.methods)
str_align = '{:I^%d}' % w
if len(P.lv_names)>1:
for i in range(len(segments)):
printLine('%9s: ' % segments[i], str_dat, dFs[i], ddFs[i])
for i in remark:
print str_align.replace('I', ' ').format(i)
else:
printLine('%9s: ' % segments[-1], str_dat, dFs[-1], ddFs[-1])
# Store results.
outfile = open(os.path.join(P.output_directory, 'results.txt'), 'w')
outfile.writelines(outtext)
outfile.close()
P.datafile_directory = os.getcwd()
P.when_analyzed = ttt_time.asctime()
P.ddf_allk = ddf_allk
P.df_allk = df_allk
P.ddFs = ddFs
P.dFs = dFs
outfile = open(os.path.join(P.output_directory, 'results.pickle'), 'w')
pickle.dump(P, outfile)
outfile.close()
print '\n'+w*'*'
for i in [" The above table has been stored in ", " "+P.output_directory+"/results.txt ",
" while the full-precision data ", " (along with the simulation profile) in ", " "+P.output_directory+"/results.pickle "]:
print str_align.format('{:^40}'.format(i))
print w*'*'
return
#===================================================================================================
# FUNCTIONS: Free energy change vs. simulation time. Called by the -f flag.
#===================================================================================================
def dF_t():
def plotdFvsTime(f_ts, r_ts, F_df, R_df, F_ddf, R_ddf):
"""Plots the free energy change computed using the equilibrated snapshots between the proper target time frames (f_ts and r_ts)
in both forward (data points are stored in F_df and F_ddf) and reverse (data points are stored in R_df and R_ddf) directions."""
fig = pl.figure(figsize=(8,6))
ax = fig.add_subplot(111)
pl.setp(ax.spines['bottom'], color='#D2B9D3', lw=3, zorder=-2)
pl.setp(ax.spines['left'], color='#D2B9D3', lw=3, zorder=-2)
for dire in ['top', 'right']:
ax.spines[dire].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
max_fts = max(f_ts)
rr_ts = [aa/max_fts for aa in f_ts[::-1]]
f_ts = [aa/max_fts for aa in f_ts]
r_ts = [aa/max_fts for aa in r_ts]
line0 = pl.fill_between([r_ts[0], f_ts[-1]], R_df[0]-R_ddf[0], R_df[0]+R_ddf[0], color='#D2B9D3', zorder=-5)
for i in range(len(f_ts)):
line1 = pl.plot([f_ts[i]]*2, [F_df[i]-F_ddf[i], F_df[i]+F_ddf[i]], color='#736AFF', ls='-', lw=3, solid_capstyle='round', zorder=1)
line11 = pl.plot(f_ts, F_df, color='#736AFF', ls='-', lw=3, marker='o', mfc='w', mew=2.5, mec='#736AFF', ms=12, zorder=2)
for i in range(len(rr_ts)):
line2 = pl.plot([rr_ts[i]]*2, [R_df[i]-R_ddf[i], R_df[i]+R_ddf[i]], color='#C11B17', ls='-', lw=3, solid_capstyle='round', zorder=3)
line22 = pl.plot(rr_ts, R_df, color='#C11B17', ls='-', lw=3, marker='o', mfc='w', mew=2.5, mec='#C11B17', ms=12, zorder=4)
pl.xlim(r_ts[0], f_ts[-1])
pl.xticks(r_ts[::2] + f_ts[-1:], fontsize=10)
pl.yticks(fontsize=10)
leg = pl.legend((line1[0], line2[0]), (r'$Forward$', r'$Reverse$'), loc=9, prop=FP(size=18), frameon=False)
pl.xlabel(r'$\mathrm{Fraction\/of\/the\/simulation\/time}$', fontsize=16, color='#151B54')
pl.ylabel(r'$\mathrm{\Delta G\/%s}$' % P.units, fontsize=16, color='#151B54')
pl.xticks(f_ts, ['%.2f' % i for i in f_ts])
pl.tick_params(axis='x', color='#D2B9D3')
pl.tick_params(axis='y', color='#D2B9D3')
pl.savefig(os.path.join(P.output_directory, 'dF_t.pdf'))
pl.close(fig)
return
if not 'MBAR' in P.methods:
parser.error("\nCurrent version of the dF(t) analysis works with MBAR only and the method is not found in the list.")
if not (P.snap_size[0] == numpy.array(P.snap_size)).all(): # this could be circumvented
parser.error("\nThe snapshot size isn't the same for all the files; cannot perform the dF(t) analysis.")
# Define a list of bForwrev equidistant time frames at which the free energy is to be estimated; count up the snapshots embounded between the time frames.
n_tf = P.bForwrev + 1
nss_tf = numpy.zeros([n_tf, K], int)
increment = 1./(n_tf-1)
if P.bExpanded:
from collections import Counter # for counting elements in an array
tf = numpy.arange(0,1+increment,increment)*(numpy.sum(nsnapshots)-1)+1
tf[0] = 0
for i in range(n_tf-1):
nss = Counter(extract_states[tf[i]:tf[i+1]])
nss_tf[i+1] = numpy.array([nss[j] for j in range(K)])
else:
tf = numpy.arange(0,1+increment,increment)*(max(nsnapshots)-1)+1
tf[0] = 0
for i in range(n_tf-1):
nss_tf[i+1] = numpy.array([min(j, tf[i+1]) for j in nsnapshots]) - numpy.sum(nss_tf[:i+1],axis=0)
# Define the real time scale (in ps) rather than a snapshot sequence.
ts = ["%.1f" % ((i-(i!=tf[0]))*P.snap_size[0] + P.equiltime) for i in tf]
# Initialize arrays to store data points to be plotted.
F_df = numpy.zeros(n_tf-1, float)
F_ddf = numpy.zeros(n_tf-1, float)
R_df = numpy.zeros(n_tf-1, float)
R_ddf = numpy.zeros(n_tf-1, float)
# Store the MBAR energy that accounts for all the equilibrated snapshots (has already been computed in the previous section).
F_df[-1], F_ddf[-1] = (Deltaf_ij[0,K-1]/P.beta_report, dDeltaf_ij[0,K-1]/P.beta_report)
R_df[0], R_ddf[0] = (Deltaf_ij[0,K-1]/P.beta_report, dDeltaf_ij[0,K-1]/P.beta_report)
# Do the forward analysis.
print "Forward dF(t) analysis...\nEstimating the free energy change using the data up to"
sta = nss_tf[0]
for i in range(n_tf-2):
print "%60s ps..." % ts[i+1]
fin = numpy.sum(nss_tf[:i+2],axis=0)
N_k, u_kln = uncorrelate(nss_tf[0], numpy.sum(nss_tf[:i+2],axis=0))
F_df[i], F_ddf[i] = estimatewithMBAR(u_kln, N_k, P.relative_tolerance)
a, b = estimatewithMBAR(u_kln, N_k, P.relative_tolerance)
# Do the reverse analysis.
print "Reverse dF(t) analysis...\nUsing the data starting from"
fin = numpy.sum(nss_tf[:],axis=0)
for i in range(n_tf-2):
print "%34s ps..." % ts[i+1]
sta = numpy.sum(nss_tf[:i+2],axis=0)
N_k, u_kln = uncorrelate(sta, fin)
R_df[i+1], R_ddf[i+1] = estimatewithMBAR(u_kln, N_k, P.relative_tolerance)
print """\n The free energies %s evaluated by using the trajectory
snaphots corresponding to various time intervals for both the
reverse and forward (in parentheses) direction.\n""" % P.units
print "%s\n %20s %19s %20s\n%s" % (70*'-', 'Time interval, ps','Reverse', 'Forward', 70*'-')
print "%10s -- %s\n%10s -- %-10s %11.3f +- %5.3f %16s\n" % (ts[0], ts[-1], '('+ts[0], ts[0]+')', R_df[0], R_ddf[0], 'XXXXXX')
for i in range(1, len(ts)-1):
print "%10s -- %s\n%10s -- %-10s %11.3f +- %5.3f %11.3f +- %5.3f\n" % (ts[i], ts[-1], '('+ts[0], ts[i]+')', R_df[i], R_ddf[i], F_df[i-1], F_ddf[i-1])
print "%10s -- %s\n%10s -- %-10s %16s %15.3f +- %5.3f\n%s" % (ts[-1], ts[-1], '('+ts[0], ts[-1]+')', 'XXXXXX', F_df[-1], F_ddf[-1], 70*'-')
# Plot the forward and reverse dF(t); store the data points in the text file.
print "Plotting data to the file dF_t.pdf...\n\n"
plotdFvsTime([float(i) for i in ts[1:]], [float(i) for i in ts[:-1]], F_df, R_df, F_ddf, R_ddf)
outtext = ["%12s %10s %-10s %17s %10s %s\n" % ('Time (ps)', 'Forward', P.units, 'Time (ps)', 'Reverse', P.units)]
outtext+= ["%10s %11.3f +- %5.3f %18s %11.3f +- %5.3f\n" % (ts[1:][i], F_df[i], F_ddf[i], ts[:-1][i], R_df[i], R_ddf[i]) for i in range(len(F_df))]
outfile = open(os.path.join(P.output_directory, 'dF_t.txt'), 'w'); outfile.writelines(outtext); outfile.close()
return
#===================================================================================================
# FUNCTIONS: Free energy change breakdown (into lambda-pair dFs). Called by the -g flag.
#===================================================================================================
def plotdFvsLambda():
def plotdFvsLambda1():
"""Plots the free energy differences evaluated for each pair of adjacent states for all methods."""
x = numpy.arange(len(df_allk))
if x[-1]<8:
fig = pl.figure(figsize = (8,6))
else:
fig = pl.figure(figsize = (len(x),6))
width = 1./(len(P.methods)+1)
elw = 30*width
colors = {'TI':'#C45AEC', 'TI-CUBIC':'#33CC33', 'DEXP':'#F87431', 'IEXP':'#FF3030', 'GINS':'#EAC117', 'GDEL':'#347235', 'BAR':'#6698FF', 'UBAR':'#817339', 'RBAR':'#C11B17', 'MBAR':'#F9B7FF'}
lines = tuple()
for name in P.methods:
y = [df_allk[i][name]/P.beta_report for i in x]
ye = [ddf_allk[i][name]/P.beta_report for i in x]
line = pl.bar(x+len(lines)*width, y, width, color=colors[name], yerr=ye, lw=0.1*elw, error_kw=dict(elinewidth=elw, ecolor='black', capsize=0.5*elw))
lines += (line[0],)
pl.xlabel('States', fontsize=12, color='#151B54')
pl.ylabel('$\Delta G$ '+P.units, fontsize=12, color='#151B54')
pl.xticks(x+0.5*width*len(P.methods), tuple(['%d--%d' % (i, i+1) for i in x]), fontsize=8)
pl.yticks(fontsize=8)
pl.xlim(x[0], x[-1]+len(lines)*width)
ax = pl.gca()
for dir in ['right', 'top', 'bottom']:
ax.spines[dir].set_color('none')
ax.yaxis.set_ticks_position('left')
for tick in ax.get_xticklines():
tick.set_visible(False)
leg = pl.legend(lines, tuple(P.methods), loc=3, ncol=2, prop=FP(size=10), fancybox=True)
leg.get_frame().set_alpha(0.5)
pl.title('The free energy change breakdown', fontsize = 12)
pl.savefig(os.path.join(P.output_directory, 'dF_state_long.pdf'), bbox_inches='tight')
pl.close(fig)
return
def plotdFvsLambda2(nb=10):
"""Plots the free energy differences evaluated for each pair of adjacent states for all methods.
The layout is approximately 'nb' bars per subplot."""
x = numpy.arange(len(df_allk))
if len(x) < nb:
return
xs = numpy.array_split(x, len(x)/nb+1)
mnb = max([len(i) for i in xs])
fig = pl.figure(figsize = (8,6))
width = 1./(len(P.methods)+1)
elw = 30*width
colors = {'TI':'#C45AEC', 'TI-CUBIC':'#33CC33', 'DEXP':'#F87431', 'IEXP':'#FF3030', 'GINS':'#EAC117', 'GDEL':'#347235', 'BAR':'#6698FF', 'UBAR':'#817339', 'RBAR':'#C11B17', 'MBAR':'#F9B7FF'}
ndx = 1
for x in xs:
lines = tuple()
ax = pl.subplot(len(xs), 1, ndx)
for name in P.methods:
y = [df_allk[i][name]/P.beta_report for i in x]
ye = [ddf_allk[i][name]/P.beta_report for i in x]
line = pl.bar(x+len(lines)*width, y, width, color=colors[name], yerr=ye, lw=0.05*elw, error_kw=dict(elinewidth=elw, ecolor='black', capsize=0.5*elw))
lines += (line[0],)
for dir in ['left', 'right', 'top', 'bottom']:
if dir == 'left':
ax.yaxis.set_ticks_position(dir)
else:
ax.spines[dir].set_color('none')
pl.yticks(fontsize=10)
ax.xaxis.set_ticks([])
for i in x+0.5*width*len(P.methods):
ax.annotate('$\mathrm{%d-%d}$' % (i, i+1), xy=(i, 0), xycoords=('data', 'axes fraction'), xytext=(0, -2), size=10, textcoords='offset points', va='top', ha='center')
pl.xlim(x[0], x[-1]+len(lines)*width + (mnb - len(x)))
ndx += 1
leg = ax.legend(lines, tuple(P.methods), loc=0, ncol=2, prop=FP(size=8), title='$\mathrm{\Delta G\/%s\/}\mathit{vs.}\/\mathrm{lambda\/pair}$' % P.units, fancybox=True)
leg.get_frame().set_alpha(0.5)
pl.savefig(os.path.join(P.output_directory, 'dF_state.pdf'), bbox_inches='tight')
pl.close(fig)
return
def plotTI():
"""Plots the ave_dhdl array as a function of the lambda value.
If (TI and TI-CUBIC in methods) -- plots the TI integration area and the TI-CUBIC interpolation curve,
elif (only one of them in methods) -- plots the integration area of the method."""
min_dl = dlam[dlam != 0].min()
S = int(0.4/min_dl)
fig = pl.figure(figsize = (8,6))
ax = fig.add_subplot(1,1,1)
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
for k, spine in ax.spines.items():
spine.set_zorder(12.2)
xs, ndx, dx = [0], 0, 0.001
colors = ['r', 'g', '#7F38EC', '#9F000F', 'b', 'y']
min_y, max_y = 0, 0
lines = tuple()
## lv_names2 = [r'$Coulomb$', r'$vdWaals$'] ## for the paper
lv_names2 = [r'$%s$' % string_i.capitalize() for string_i in P.lv_names]
for j in range(n_components):
y = ave_dhdl[:,j]
if not (y == 0).all():
#if not cubspl[j] == 0:
# Get the coordinates.
lj = lchange[:,j]
x = lv[:,j][lj]
y = y[lj]/P.beta_report
if 'TI' in P.methods:
# Plot the TI integration area.
ss = 'TI'
for i in range(len(x)-1):
min_y = min(y.min(), min_y)
max_y = max(y.max(), max_y)
#pl.plot(x,y)
if i%2==0:
pl.fill_between(x[i:i+2]+ndx, 0, y[i:i+2], color=colors[ndx], alpha=1.0)
else:
pl.fill_between(x[i:i+2]+ndx, 0, y[i:i+2], color=colors[ndx], alpha=0.5)
xlegend = [-100*wnum for wnum in range(len(lv_names2))]
pl.plot(xlegend, [0*wnum for wnum in xlegend], ls='-', color=colors[ndx], label=lv_names2[ndx]) ## for the paper
if 'TI-CUBIC' in P.methods:
# Plot the TI-CUBIC interpolation curve.
ss += ' and TI-CUBIC'
xnew = numpy.arange(0, 1+dx, dx)
ynew = cubspl[j].interpolate(y, xnew)
min_y = min(ynew.min(), min_y)
max_y = max(ynew.max(), max_y)
pl.plot(xnew+ndx, ynew, color='#B6B6B4', ls ='-', solid_capstyle='round', lw=3.0)
else:
# Plot the TI-CUBIC integration area.
ss = 'TI-CUBIC'
for i in range(len(x)-1):
xnew = numpy.arange(x[i], x[i+1]+dx, dx)
ynew = cubspl[j].interpolate(y, xnew)
ynew[0], ynew[-1] = y[i], y[i+1]
min_y = min(ynew.min(), min_y)
max_y = max(ynew.max(), max_y)
if i%2==0:
pl.fill_between(xnew+ndx, 0, ynew, color=colors[ndx], alpha=1.0)
else:
pl.fill_between(xnew+ndx, 0, ynew, color=colors[ndx], alpha=0.5)
# Store the abscissa values and update the subplot index.
xs += (x+ndx).tolist()[1:]
ndx += 1
# Make sure the tick labels are not overcrowded.
xs = numpy.array(xs)
dl_mat = numpy.array([xs-i for i in xs])
ri = range(len(xs))
def getInd(r=ri, z=[0]):
primo = r[0]
min_dl=ndx*0.02*2**(primo>10)
if dl_mat[primo].max()<min_dl:
return z
for i in r:
for j in range(len(xs)):
if dl_mat[i,j]>min_dl:
z.append(j)
return getInd(ri[j:], z)
xt = [i if (i in getInd()) else '' for i in range(K)]
pl.xticks(xs[1:], xt[1:], fontsize=10)
pl.yticks(fontsize=10)
#ax = pl.gca()
#for label in ax.get_xticklabels():
# label.set_bbox(dict(fc='w', ec='None', alpha=0.5))
# Remove the abscissa ticks and set up the axes limits.
for tick in ax.get_xticklines():
tick.set_visible(False)
pl.xlim(0, ndx)
min_y *= 1.01
max_y *= 1.01
pl.ylim(min_y, max_y)
for i,j in zip(xs[1:], xt[1:]):
pl.annotate(('%.2f' % (i-1.0 if i>1.0 else i) if not j=='' else ''), xy=(i, 0), xytext=(i, 0.01), size=10, rotation=90, textcoords=('data', 'axes fraction'), va='bottom', ha='center', color='#151B54')
if ndx>1:
lenticks = len(ax.get_ymajorticklabels()) - 1
if min_y<0: lenticks -= 1
if lenticks < 5:
from matplotlib.ticker import AutoMinorLocator as AML
ax.yaxis.set_minor_locator(AML())
pl.grid(which='both', color='w', lw=0.25, axis='y', zorder=12)
pl.ylabel(r'$\mathrm{\langle{\frac{ \partial U } { \partial \lambda }}\rangle_{\lambda}\/%s}$' % P.units, fontsize=20, color='#151B54')
pl.annotate('$\mathit{\lambda}$', xy=(0, 0), xytext=(0.5, -0.05), size=18, textcoords='axes fraction', va='top', ha='center', color='#151B54')
if not P.software.title()=='Sire':
lege = ax.legend(prop=FP(size=14), frameon=False, loc=1)
for l in lege.legendHandles:
l.set_linewidth(10)
pl.savefig(os.path.join(P.output_directory, 'dhdl_TI.pdf'))
pl.close(fig)
return
print "Plotting the free energy breakdown figure..."
plotdFvsLambda1()
plotdFvsLambda2()
if ('TI' in P.methods or 'TI-CUBIC' in P.methods):
print "Plotting the TI figure..."
plotTI()
#===================================================================================================
# FUNCTIONS: The Curve-Fitting Method. Called by the -c flag.
#===================================================================================================
def plotCFM(u_kln, N_k, num_bins=100):
"""A graphical representation of what Bennett calls 'Curve-Fitting Method'."""
print "Plotting the CFM figure..."
def leaveTicksOnlyOnThe(xdir, ydir, axis):
dirs = ['left', 'right', 'top', 'bottom']
axis.xaxis.set_ticks_position(xdir)
axis.yaxis.set_ticks_position(ydir)
return
def plotdg_vs_dU(yy, df_allk, ddf_allk):
sq = (len(yy))**0.5
h = int(sq)
w = h + 1 + 1*(sq-h>0.5)
scale = round(w/3., 1)+0.4 if len(yy)>13 else 1
sf = numpy.ceil(scale*3) if scale>1 else 0
fig = pl.figure(figsize = (8*scale,6*scale))
matplotlib.rc('axes', facecolor = '#E3E4FA')
matplotlib.rc('axes', edgecolor = 'white')
if P.bSkipLambdaIndex:
ks = [int(l) for l in P.bSkipLambdaIndex.split('-')]
ks = numpy.delete(numpy.arange(K+len(ks)), ks)
else:
ks = range(K)
for i, (xx_i, yy_i) in enumerate(yy):
ax = pl.subplot(h, w, i+1)
ax.plot(xx_i, yy_i, color='r', ls='-', lw=3, marker='o', mec='r')
leaveTicksOnlyOnThe('bottom', 'left', ax)
ax.locator_params(axis='x', nbins=5)
ax.locator_params(axis='y', nbins=6)
ax.fill_between(xx_i, df_allk[i]['BAR'] - ddf_allk[i]['BAR'], df_allk[i]['BAR'] + ddf_allk[i]['BAR'], color='#D2B9D3', zorder=-1)
ax.annotate(r'$\mathrm{%d-%d}$' % (ks[i], ks[i+1]), xy=(0.5, 0.9), xycoords=('axes fraction', 'axes fraction'), xytext=(0, -2), size=14, textcoords='offset points', va='top', ha='center', color='#151B54', bbox = dict(fc='w', ec='none', boxstyle='round', alpha=0.5))
pl.xlim(xx_i.min(), xx_i.max())
pl.annotate(r'$\mathrm{\Delta U_{i,i+1}\/(reduced\/units)}$', xy=(0.5, 0.03), xytext=(0.5, 0), xycoords=('figure fraction', 'figure fraction'), size=20+sf, textcoords='offset points', va='center', ha='center', color='#151B54')
pl.annotate(r'$\mathrm{\Delta g_{i+1,i}\/(reduced\/units)}$', xy=(0.06, 0.5), xytext=(0, 0.5), rotation=90, xycoords=('figure fraction', 'figure fraction'), size=20+sf, textcoords='offset points', va='center', ha='center', color='#151B54')
pl.savefig(os.path.join(P.output_directory, 'cfm.pdf'))
pl.close(fig)
return
def findOptimalMinMax(ar):
c = zip(*numpy.histogram(ar, bins=10))
thr = int(ar.size/8.)
mi, ma = ar.min(), ar.max()
for (i,j) in c:
if i>thr:
mi = j
break