-
Notifications
You must be signed in to change notification settings - Fork 7
/
main.py
executable file
·859 lines (775 loc) · 41.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
#!/usr/bin/env python
'''
E2EDNA 2.0 - OpenMM Implementation of E2EDNA !
An automated pipeline for simulating DNA aptamers complexed with target ligands (peptide, DNA, RNA or small molecules).
Copyright (C) 2022 Michael Kilgour, Tao Liu and Lena Simine
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
'''
import argparse
import glob
import os
import platform
import shutil
from pathlib import Path
import yaml
from opendna import *
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
parser = argparse.ArgumentParser(
description='E2EDNA: Simulate DNA aptamers complexed with target ligands',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
compute_info = parser.add_argument_group('Compute Platform Configuration')
paths = parser.add_argument_group('Directory Settings')
run_info = parser.add_argument_group('Run Parameters')
# =========================== YAML configuration file ===========================
parser.add_argument('-yaml',
'--yaml_config',
metavar='\b', # now to show metavar in usage message
type=str,
default='simu_config.yaml',
required=True,
help='A YAML configuration file that can specify all the arguments')
parser.add_argument('-ow',
'--overwrite',
action='store_true',
required=False,
help='Overwrite existing --run_num')
# =========================== Compute platform information ===========================
compute_info.add_argument('-d',
'--device',
metavar='\b',
type=str,
default='local',
help='Device configuration',
choices=['local', 'cluster'])
compute_info.add_argument('-os',
'--operating_system',
metavar='\b',
type=str,
default='macos',
help='Operating system',
choices=['macos', 'linux', 'WSL'])
compute_info.add_argument('-p',
'--platform',
metavar='\b',
type=str,
default='CPU',
help='Processing platform',
choices=['CPU', 'CUDA', 'OpenCL'])
compute_info.add_argument('--CUDA_precision',
metavar='\b',
type=str,
default='single',
help='Precision of CUDA or OpenCL, if used',
choices=['single', 'double', 'mixed'])
# =========================== Directory information ===========================
paths.add_argument('-w',
'--workdir',
metavar='DIR',
type=str,
default='./e2edna_runs',
help='Working directory to store individual output runs')
paths.add_argument('-mbdir',
'--mmb_dir',
metavar='\b',
type=str,
default=None,
# default='Installer*/lib',
help='MMB library directory')
paths.add_argument('-mb',
'--mmb',
metavar='\b',
type=str,
default=None,
# default='Installer*/bin/MMB*',
help='Path to MMB executable')
# =========================== Parameters to run pipeline ===========================
# "required" is set to be False as default, so that it is possible to only set it up in yaml config file
run_info.add_argument('--quick_check_mode',
metavar='\b',
type=str,
default='Yes',
help='Rapidly run a certain mode for quick check using default test parameters',
choices=['Yes','No'])
# run_num is a string
run_info.add_argument('-r',
'--run_num',
metavar='\b',
type=str,
default='1',
help='Run number. Output will be written to {--workdir}/run{--run_num}')
# ================ DNA aptamer and ligand info ================
run_info.add_argument('-m',
'--mode',
metavar='\b',
type=str,
help='Run mode',
choices=['2d structure', '3d coarse', '3d smooth',
'coarse dock', 'smooth dock', 'free aptamer',
'full dock', 'full binding'])
''' --mode:
'2d structure': ssString, pair list and probability
'3d coarse': MMB output, stressed structure, no solvent
'3d smooth': MMB output with short MD relaxation
'coarse dock': best docking scores on coarse MMB structure
'smooth dock': best docking scores on smoothed MMB structure
'free aptamer': evaluate and find representative 3D aptamer structure
'full dock': 'free aptamer' + docking
'full binding': 'full dock' + binding
'''
run_info.add_argument('-a',
'--aptamer_seq',
metavar='\b',
type=str,
help="DNA Aptamer sequence (5'->3')")
run_info.add_argument('-l',
'--ligand',
metavar='\b',
type=str,
default=None,
help="Name of PDB file for ligand structure; None if not to have ligand")
run_info.add_argument('-lt',
'--ligand_type',
metavar='\b',
type=str,
default=None,
help='Type of ligand molecule',
choices=['peptide', 'DNA', 'RNA', 'other'])
run_info.add_argument('-ls',
'--ligand_seq',
metavar='\b',
type=str,
default=None,
help='Ligand sequence if peptide, DNA, or RNA')
# Examples:
run_info.add_argument('--example_target_pdb',
metavar='\b',
type=str,
default='examples/example_peptide_ligand.pdb',
help='An example peptide ligand included in E2EDNA package: used when wish to test docking')
run_info.add_argument('--example_peptide_seq',
metavar='\b',
type=str,
default='YQTQTNSPRRAR',
help='The sequence of the example peptide ligand')
# ================ Pipeline module: NUPACK and MMB ================
# # NUPACK and MMB modules:
run_info.add_argument('--skip_MMB',
metavar='\b',
type=str,
default='No',
help="If `Yes`: skip both 2D structure analysis and MMB folding, and start with a known --init_structure",
choices=['Yes','No'])
run_info.add_argument('-init',
'--init_structure',
metavar='\b',
type=str,
default=None,
help='Name of PDB file if starting pipeline on a DNA aptamer with known structure')
run_info.add_argument('--secondary_structure_engine',
metavar='\b',
type=str,
default='NUPACK',
help='Pipeline module that is used to predict secondary structures')
run_info.add_argument('--ensemble',
metavar='\b',
type=str,
default='nostacking',
help='How NUPACK4 handles the stacking calculations',
choices=['nostacking', 'stacking', 'none-nupack3', 'some-nupack3', 'all-nupack3'])
run_info.add_argument('--N_2D_structures',
metavar='\b',
type=int,
default=1,
help='Number of predicted secondary structures')
run_info.add_argument('--Mg_conc',
metavar='\b',
type=float,
default=0.0,
help='Magnesium molar concentration used in NUPACK: [0, 0.2]')
run_info.add_argument('--fold_fidelity',
metavar='\b',
type=float,
default=0.9,
help="Refold in MMB if score < `fold_fidelity` unless the `fold_speed` is `quick`")
run_info.add_argument('--fold_speed',
metavar='\b',
type=str,
default='normal',
help='MMB folding speed',
choices=['quick','normal','slow'])
run_info.add_argument('--mmb_normal_template',
metavar='\b',
type=str,
default='lib/mmb/commands.template.dat',
help='Path to MMB folding protocol of normal speed')
run_info.add_argument('--mmb_quick_template',
metavar='\b',
type=str,
default='lib/mmb/commands.template_quick.dat',
help='Path to MMB folding protocol of quick speed')
run_info.add_argument('--mmb_slow_template',
metavar='\b',
type=str,
default='lib/mmb/commands.template_long.dat',
help='Path to MMB folding protocol of slow speed')
run_info.add_argument('--mmb_params',
metavar='\b',
type=str,
default='lib/mmb/parameters.csv',
help='Path to parameter file bundled with MMB package')
# ================ Pipeline module: OpenMM ================
# OpenMM module:
# # resume a MD sampling
run_info.add_argument('-pk',
'--pickup',
metavar='\b',
type=str,
default='No',
help='Whether the run is to resume MD sampling of an unfinished run or an old run',
choices=['Yes', 'No'])
run_info.add_argument('--pickup_from_freeAptamerChk',
metavar='\b',
type=str,
default='No',
help='Resume MD sampling of free aptamer: skip everything before it',
choices=['Yes','No'])
run_info.add_argument('--pickup_from_complexChk',
metavar='\b',
type=str,
default='No',
help='Resume MD sampling of aptamer-ligand: skip everything before it',
choices=['Yes','No'])
run_info.add_argument('--chk_file',
metavar='\b',
type=str,
default=None,
help='Name of checkpoint file for resuming MD sampling, format: <path>/<filename>.chk')
run_info.add_argument('--pickup_pdb',
metavar='\b',
type=str,
default=None,
help='PDB file (topology+coordinates) for resuming MD sampling in explicit solvent, format: <path>/<filename>.pdb')
# # Environmental parameters
run_info.add_argument('--pressure',
metavar='\b',
type=float,
default=1.0,
help='Pressure in the unit of atm')
run_info.add_argument('--temperature',
metavar='\b',
type=float,
default=298.0,
help='Temperature in Kelvin')
run_info.add_argument('--NaCl_molar',
metavar='\b',
type=float,
default=0.1,
help='Sodium molar concentration (could be used by NUPACK and OpenMM)') # could be used in implicit solvent as well
run_info.add_argument('--pH',
metavar='\b',
type=float,
default=7.4,
help='Could be used by OpenMM')
# # MD sampling: time and Langevin integrator
run_info.add_argument('--auto_sampling',
metavar='\b',
type=str,
default='No',
help="If `Yes`: run MD sampling till convergence, currently only feasible in free aptamer sampling",
choices=['Yes','No'])
run_info.add_argument('--autoMD_convergence_cutoff',
metavar='\b',
type=float,
default=1.0e-2,
help='Convergence cutoff if doing auto_sampling')
run_info.add_argument('--max_aptamer_sampling_iter',
metavar='\b',
type=int,
default=20,
help='Max number of iterations for free aptamer MD sampling if doing auto_sampling')
run_info.add_argument('--max_walltime',
metavar='\b',
type=float,
default=24.0,
help='Walltime in hours to check runtime')
run_info.add_argument('--skip_smoothing',
metavar='\b',
type=str,
default='Yes',
help=" If `Yes`: no short MD relaxation before MD sampling",
choices=['Yes','No'])
run_info.add_argument('--equilibration_time',
metavar='\b',
type=float,
default=0.1,
help='Equilibration time in nanoseconds after energy minimization and before MD sampling')
run_info.add_argument('--smoothing_time',
metavar='\b',
type=float,
default=None,
help='Time in nanoseconds for short MD relaxation before MD sampling, if any')
run_info.add_argument('--aptamer_sampling_time',
metavar='\b',
type=float,
default=None,
help='MD sampling time in nanoseconds for free aptamer dynamics')
run_info.add_argument('--complex_sampling_time',
metavar='\b',
type=float,
default=None,
help='MD sampling time in nanoseconds for aptamer-ligand complex dynamics')
run_info.add_argument('--time_step',
metavar='\b',
type=float,
default=2.0,
help='time step in femtoseconds in MD sampling')
run_info.add_argument('--print_step',
metavar='\b',
type=float,
default=10.0,
help='Printout step in picoseconds in MD sampling')
# # Create simulation system
run_info.add_argument('--force_field',
metavar='\b',
type=str,
default='amber14-all',
help='Force field used in OpenMM')
# amber14/protein.ff14SB, amber14/DNA.OL15,amber14/RNA.OL3, amber14/lipid17, etc.
# http://docs.openmm.org/latest/userguide/application/02_running_sims.html#force-fields
run_info.add_argument('--hydrogen_mass',
metavar='\b',
type=float,
default=1.5,
help='Unit is amu')
# # Default is to run simulation in explicit solvent
run_info.add_argument('--water_model',
metavar='\b',
type=str,
default='amber14/tip3p',
help='Explicit water solvent model used in OpenMM') # amber14/tip3pfb,amber14/tip4pew,amber14/spce,etc.
run_info.add_argument('--box_offset',
metavar='\b',
type=float,
default=1.0,
help='Buffering offset in nanometers on solvent box, if using explicit solvent')
run_info.add_argument('--constraints',
metavar='\b',
type=str,
default='None',
help="Specify which bond angles and/or lengths should be implemented with constraints",
choices=['None','HBonds','AllBonds','HAngles'])
run_info.add_argument('--constraint_tolerance',
metavar='\b',
type=float,
default=1.0e-6,
help='Distance tolerance for constraint in OpenMM integrator')
run_info.add_argument('--rigid_water',
metavar='\b',
type=str,
default='Yes',
help='Whether to make water molecules completely rigid at bond lengths and angles',
choices=['Yes','No'])
run_info.add_argument('--nonbonded_method',
metavar='\b',
type=str,
default='NoCutoff',
help='Type of nonbonded interactions',
choices=['Ewald','PME','LJPME','CutoffPeriodic','CutoffNonPeriodic','NoCutoff'])
# In implicit solvent: only 'CutoffPeriodic', 'CutoffNonPeriodic' or 'NoCutoff'
run_info.add_argument('--nonbonded_cutoff',
metavar='\b',
type=float,
default=1.0,
help='The cutoff distance in nanometers to use for nonbonded interactions')
run_info.add_argument('--ewald_error_tolerance',
metavar='\b',
type=float,
default=5.0e-4,
help="Error tolerance if `nonbonded_method` is `Ewald`, `PME`, or `LJPME`")
run_info.add_argument('--friction',
metavar='\b',
type=float,
default=1.0,
help='Friction coefficient in unit of 1/ps, used in Langevin integrator')
# # Option: using implicit solvent
run_info.add_argument('--implicit_solvent',
metavar='\b',
type=str,
default='No',
help='Whether to use an Amber GB implicit solvent model',
choices=['Yes','No'])
run_info.add_argument('--implicit_solvent_model',
metavar='\b',
type=str,
default=None,
help="Specify an Amber GB implicit solvent model if needed",
choices=['HCT','OBC1','OBC2','GBn','GBn2'])
run_info.add_argument('--soluteDielectric',
metavar='\b',
type=float,
default=1.0,
help='The solute dielectric constant to use in the implicit solvent model')
run_info.add_argument('--solventDielectric',
metavar='\b',
type=float,
default=78.5,
help='The solvent dielectric constant to use in the implicit solvent model')
run_info.add_argument('--implicit_solvent_Kappa',
metavar='\b',
type=float,
default=None,
help='Debye screening parameter; If specified by user, OpenMM will ignore {--NaCl_molar} in implicit solvent.')
run_info.add_argument('--leap_template',
metavar='\b',
type=str,
default='leap_template.in',
help='A script for running LEap program in Ambertools21, provided in E2EDNA package.')
run_info.add_argument('--DNA_force_field',
metavar='\b',
type=str,
default='DNA.OL15',
help='Force field for DNA used by LEap program',
choices=['DNA.OL15','DNA.bsc1'])
# ================ Pipeline module: LightDock ================
run_info.add_argument('--docking_steps',
metavar='\b',
type=int,
default=10,
help='Number of steps for docking simulations')
run_info.add_argument('--N_docked_structures',
metavar='\b',
type=int,
default=1,
help='Number of docked structures output from the docker')
# =========================== Above: finished reading arguments from command line input ===========================
params = parser.parse_args()
# Load up configuraton yaml file
if params.yaml_config is not None:
yaml_path = Path(params.yaml_config)
assert yaml_path.exists()
assert yaml_path.suffix in {".yaml", ".yml"}
with yaml_path.open("r") as f:
yaml_config = yaml.safe_load(f) # return a dict
# Convert from namespace to dictionary
params = vars(params)
# Merge: if any overlap, yaml file will overwrite command line inputs (especially the default values in params).
params.update(yaml_config)
# yaml file does not have to set up all arguments
# Convert 10 'Yes'/'No' binary arguments into boolean:
params['quick_check_mode'] = params['quick_check_mode']=='Yes'
# params['explicit_run_enumeration'] = params['explicit_run_enumeration']=='Yes'
params['skip_MMB'] = params['skip_MMB']=='Yes'
params['pickup'] = params['pickup']=='Yes'
params['pickup_from_freeAptamerChk'] = params['pickup_from_freeAptamerChk']=='Yes'
params['pickup_from_complexChk'] = params['pickup_from_complexChk']=='Yes'
params['auto_sampling'] = params['auto_sampling']=='Yes'
params['skip_smoothing'] = params['skip_smoothing']=='Yes'
params['rigid_water'] = params['rigid_water']=='Yes'
params['implicit_solvent'] = params['implicit_solvent']=='Yes'
# Check if the mode explicitly needs MD_smoothing:
if params['mode'] in ['3d smooth', 'smooth dock']:
params['skip_smoothing'] = False
# If to quickly check a certain mode, use simplistic settings for pipeline modules
if params['quick_check_mode'] is True:
# NUPACk and MMB
params['skip_MMB'] = False
params['secondary_structure_engine'] = 'NUPACK'
params['N_2D_structures'] = 1
params['Mg_conc'] = 0.005
params['fold_fidelity'] = 0.9
params['fold_speed'] = 'quick'
params['init_structure'] = None
# OpenMM
params['pickup'] = False
params['pickup_from_freeAptamerChk'] = False
params['pickup_from_complexChk'] = False
params['chk_file'] = None
params['pickup_pdb'] = None
params['auto_sampling'] = False
params['autoMD_convergence_cutoff'] = 1e-2 # not used if auto_sampling=False
params['max_aptamer_sampling_iter'] = 2 # not used if auto_sampling=False
params['equilibration_time'] = 0.0001 # ns. 50 steps
if params['mode'] in ['3d smooth', 'smooth dock']:
params['skip_smoothing'] = False
params['smoothing_time'] = 0.001 # ns. 500 steps
else:
params['skip_smoothing'] = True
params['smoothing_time'] = None
params['aptamer_sampling_time'] = 0.002 # ns. 1000 steps
if params['mode'] == 'full binding':
params['complex_sampling_time'] = 0.002 # ns. 1000 steps
params['time_step'] = 2.0 # fs
params['print_step'] = 0.05 # ps. print out every 25 steps
params['force_field'] = 'amber14-all'
params['hydrogen_mass'] = 1.5
params['water_model'] = 'amber14/tip3p'
params['box_offset'] = 1.0
params['constraints'] = 'HBonds'
params['constraint_tolerance'] = 1.0e-6
params['rigid_water'] = True
params['nonbonded_method'] = 'PME'
params['nonbonded_cutoff'] = 1.0
params['ewald_error_tolerance'] = 5.0e-4
params['friction'] = 1.0
params['implicit_solvent'] = False
params['implicit_solvent_model'] = None
params['soluteDielectric'] = None
params['solventDielectric'] = None
params['implicit_solvent_Kappa'] = None
params['leap_template'] = None
params['DNA_force_field'] = None
# LightDock
params['docking_steps'] = 10
params['N_docked_structures'] = 1
# =========================== Verify the input arguments from yaml file and command line ===========================
if params['device'] in ['local', 'cluster']:
if params['operating_system'] in ['macos', 'linux', 'WSL']:
if params['platform'] in ['CPU', 'CUDA', 'OpenCL']:
if params['platform'] == 'CUDA' or params['platform'] == 'OpenCL':
if not params['CUDA_precision'] in ['single', 'double', 'mixed']:
parser.error(f"Invalid value: --CUDA_precision={params['CUDA_precision']}. Must be from [single, double, mixed].")
else:
parser.error(f"Invalid value: --platform={params['platform']}. Must be from [CPU, CUDA, OpenCL].")
else:
parser.error(f"Invalid value: --operating_system={params['operating_system']}. Must be from [macos, linux, WSL].")
else:
parser.error(f"Invalid value: --device={params['device']}. Must be from [local, cluster].")
# Check if workdir exists, and create if not
if not os.path.isdir(params['workdir']):
os.mkdir(params['workdir'])
# Check if output directory for run exists;
# Either fail or force overwrite if so
out_dir = os.path.join(params['workdir'], 'run' + params['run_num']) #{workdir}/run{run_num}
if os.path.isdir(out_dir):
if params['overwrite']:
shutil.rmtree(out_dir) # Delete entire directory tree
else: # even if --pickup is True, still do it in a new output directory
parser.error(f"--run_num already exists at {out_dir}\n"
'\t To overwrite: add -ow/--overwrite flag in command line.')
if not params['mode'] in ['2d structure',
'3d coarse',
'3d smooth',
'coarse dock',
'smooth dock',
'free aptamer',
'full dock',
'full binding']:
parser.error(f"Invalid value: --mode={params['mode']}\n"
"\t Must be from ['2d structure','3d coarse','3d smooth','coarse dock','smooth dock','free aptamer','full dock','full binding'].")
# --aptamer_seq is always required
# Read in aptamer sequence if it is in a file
if (params['aptamer_seq'] is None):
parser.error(f"--aptamer_seq is required, even if an initial DNA structure is available.")
elif not isinstance(params['aptamer_seq'], str):
parser.error(f"--aptamer_seq should be of string type.")
# so aptamer_seq is a string, could be a file name
elif os.path.isfile(params['aptamer_seq']):
params['aptamer_seq'] = open(params['aptamer_seq']).read().rstrip()
# At this point, params['aptamer_seq'] must be a string of actual sequence, check if it is only made of 'A','G','C','T'
if not set(params['aptamer_seq']).issubset({'A','G','C','T'}):
parser.error(f"--aptamer_seq='{params['aptamer_seq']}' is made of letters other than {{'A','G','C','T'}} (case senstive).")
# Sometimes --ligand could be None, except for:
if (params['mode'] in ['coarse dock','smooth dock','full dock']) or ((params['mode'] == 'full binding') and (params['pickup_from_complexChk'] is False)):
if params['ligand'] is None:
parser.error(f"--ligand is required in the current setting.\n"
"\t An example peptide ligand is provided at 'examples/example_peptide_ligand.pdb'.")
# Sometimes --ligand should be None:
# including: 'full binding' but to resume complex dynamics which does not need ligand's pdb
if (params['mode'] in ['2d structure','3d coarse','3d smooth','free aptamer']) or ((params['mode'] == 'full binding') and (params['pickup_from_complexChk'] is True)):
if params['ligand'] is not None:
parser.error(f"--ligand is not required in the current setting and should be None.")
# When --ligand is None, then:
if params['ligand'] is None:
# ligand_type and ligand_seq should not be used
if params['ligand_type'] is not None: parser.error('--ligand_type should be None if --ligand is None.')
if params['ligand_seq'] is not None: parser.error('--ligand_seq should be None if --ligand is None.')
# If ligand is not None: it should be a pdb file and --ligand_type is required
else:
if not os.path.isfile(params['ligand']):
parser.error(f"Invalid --ligand={params['ligand']}. Must be either None or name of a valid PDB file.")
if not params['ligand_type'] in ['peptide', 'DNA', 'RNA', 'other']:
parser.error(f"Invalid value: --ligand_type={params['ligand_type']}. Must be from [peptide, DNA, RNA, other].")
else:
# If ligand_type is peptide, DNA, or RNA, its sequence is required
if (params['ligand_type'] in ['peptide', 'DNA', 'RNA']):
if params['ligand_seq'] is None:
parser.error(f"--ligand_seq is required if --ligand_type={params['ligand_type']}")
else:
# Read ligand sequence if it is in a file
if os.path.isfile(params['ligand_seq']):
params['ligand_seq'] = open(params['ligand_seq']).read().rstrip()
# if it is just predicting 2dSS, skip_MMB should not do anything regardless of its value
# if it is to resume MD sampling, skip_MMB should not do anything and it will be forcefully set as False later.
if (params['mode'] != '2d structure') and (params['pickup'] is False):
if params['skip_MMB'] is False:
if params['init_structure'] is not None:
parser.error(f"--init_structure is required only if --skip_MMB='Yes'.")
if (params['mmb_dir'] is None) or (params['mmb'] is None):
parser.error(f"--mmb_dir and --mmb are required for MMB to run.")
# Try to find MMB library directory
if not glob.glob(params['mmb_dir']):
parser.error(f"MMB library could not be found at {params['mmb_dir']}.")
else:
found_path = glob.glob(params['mmb_dir'])[0]
if os.getcwd() in found_path: # Already absolute path
params['mmb_dir'] = found_path
else: # Need to make absolute path
params['mmb_dir'] = os.path.join(os.getcwd(), found_path)
# Try to find MMB executable
if not glob.glob(params['mmb']):
parser.error(f"MMB executable could not be found at {params['mmb']}.")
else:
found_path = glob.glob(params['mmb'])[0]
if os.getcwd() in found_path: # Already absolute path
params['mmb'] = found_path
else: # Need to make absolute path
params['mmb'] = os.path.join(os.getcwd(), found_path)
# If skip_MMB: make sure init_structure is provided, even if we are about to resume a MD sampling (in which case, keep --skip_MMB as default 'No')
else:
if params['init_structure'] is None:
parser.error(f"--init_structure is required if --skip_MMB='Yes'.")
# erase input arguments about NUPACK: no longer useful
params['secondary_structure_engine'] = None
# If OpenMM is required:
if params['mode'] in ['3d smooth','smooth dock','free aptamer','full dock','full binding']:
if params['pickup'] is True:
# Reset these boolean arguments to avoid conflict
params['skip_MMB'] = False # and not need --init_structure
params['auto_sampling'] = False
params['skip_smoothing'] = True
params['implicit_solvent'] = False
# only one MD step is resumed
if (params['pickup_from_freeAptamerChk'] is True) and (params['pickup_from_complexChk'] is True):
parser.error(f"--pickup_from_freeAptamerChk and --pickup_from_complexChk cannot be 'Yes' together if --pickup='Yes'.")
if (params['pickup_from_freeAptamerChk'] is False) and (params['pickup_from_complexChk'] is False):
parser.error(f"One of --pickup_from_freeAptamerChk and --pickup_from_complexChk should be 'Yes' if --pickup='Yes'.")
# --chk_file cannot be None
if params['chk_file'] is None:
parser.error(f"--chk_file is required if --pickup='Yes'.")
# Try to find the checkpoint file
else:
if not glob.glob(params['chk_file']):
parser.error(f"The checkpoint file could not be found: {params['chk_file']}.")
# pdb file cannot be None
if params['implicit_solvent'] is False:
if params['pickup_pdb'] is None:
parser.error(f"--pickup_pdb is required if --pickup='Yes' in explicit solvent.")
else:
parser.error(f"Resuming sampling is not currently supported in Amber implicit solvent.")
# If not to resume, all pickup flags should be 'No' and no file should be provided
else:
if (params['pickup_from_freeAptamerChk'] is True) or (params['pickup_from_complexChk'] is True):
parser.error(f"--pickup_from_freeAptamerChk and --pickup_from_complexChk should both be 'No' if --pickup='No'.")
if (params['chk_file'] is not None) or (params['pickup_pdb'] is not None):
parser.error(f"--chk_file and --pickup_pdb are not required (ie, should be Python None) if --pickup='No'.")
if params['auto_sampling'] is True:
if params['autoMD_convergence_cutoff'] is None: parser.error(f"--autoMD_convergence_cutoff is required if --auto_sampling='Yes'.")
if params['max_aptamer_sampling_iter'] is None: parser.error(f"--max_aptamer_sampling_iter is required if --auto_sampling='Yes'.")
if (params['skip_smoothing'] is False) and (params['smoothing_time'] is None):
parser.error(f"--smoothing_time is required if --skip_smoothing='No'.")
if ((params['mode'] in ['free aptamer','full dock','full binding']) and (params['pickup_from_complexChk'] is False)) or (params['pickup_from_freeAptamerChk'] is True):
if params['aptamer_sampling_time'] is None:
parser.error(f"--aptamer_sampling_time is required to run MD sampling of free aptamer.")
elif params['smoothing_time'] is not None:
parser.error(f"--smoothing_time is not required to run MD sampling of free aptamer.")
if (params['mode'] == 'full binding') or (params['pickup_from_complexChk'] is True):
if params['complex_sampling_time'] is None:
parser.error(f"--complex_sampling_time is required to run MD sampling of aptamer-ligand complex.")
elif params['smoothing_time'] is not None:
parser.error(f"--smoothing_time is not required to run MD sampling of aptamer-ligand complex.")
elif (params['pickup_from_complexChk'] is True) and (params['aptamer_sampling_time'] is not None):
parser.error(f"--aptamer_sampling_time is not required to resume a MD sampling of aptamer-ligand complex.")
# Convert some string variables to corresponding objects in OpenMM
if params['constraints'] == 'HBonds':
params['constraints'] = HBonds
elif params['constraints'] == 'HAngles':
params['constraints'] = HAngles
elif params['constraints'] == 'AllBonds':
params['constraints'] = AllBonds
elif params['constraints'] == 'None':
params['constraints'] = None
else:
parser.error(f"Invalid value: --constraints={params['constraints']}. Choose from [HBonds, AllBonds, HAngles, None]")
# Convert --nonbonded_method to corresponding objects in OpenMM
if params['implicit_solvent'] is False:
# In explicit solvent: more choices for nonbonded method
if params['nonbonded_method'] == 'NoCutoff':
params['nonbonded_method'] = NoCutoff
elif params['nonbonded_method'] == 'CutoffNonPeriodic':
params['nonbonded_method'] = CutoffNonPeriodic
elif params['nonbonded_method'] == 'CutoffPeriodic':
params['nonbonded_method'] = CutoffPeriodic
elif params['nonbonded_method'] == 'LJPME':
params['nonbonded_method'] = LJPME
elif params['nonbonded_method'] == 'Ewald':
params['nonbonded_method'] = Ewald
elif params['nonbonded_method'] == 'PME':
params['nonbonded_method'] = PME
# Particle Mesh Ewald: efficient full electrostatics method for
# use with periodic boundary conditions to calculate long-range interactions
else:
parser.error(f"Invalid value: --nonbonded_method={params['nonbonded_method']}. "
"Choose from [Ewald, PME, LJPME, CutoffPeriodic, CutoffNonPeriodic, NoCutoff] for explicit solvent.")
# implicit solvent is used
else:
params['implicit_solvent_salt_conc'] = params['NaCl_molar']
'''Salt conc in solvent: converted to debye length (kappa) using the provided temperature and solventDielectric
(see interfaces.py for detailed walk-through)'''
# Convert --implicit_solvent_model to corresponding objects in OpenMM
if params['implicit_solvent_model'] == 'HCT':
params['implicit_solvent_model'] = HCT
elif params['implicit_solvent_model'] == 'OBC1':
params['implicit_solvent_model'] = OBC1
elif params['implicit_solvent_model'] == 'OBC2':
params['implicit_solvent_model'] = OBC2
elif params['implicit_solvent_model'] == 'GBn':
params['implicit_solvent_model'] = GBn
elif params['implicit_solvent_model'] == 'GBn2':
params['implicit_solvent_model'] = GBn2
else:
parser.error(f"Invalid value: --implicit_solvent_model={params['implicit_solvent_model']}. "
"Currently supporting: HCT, OBC1, OBC2, GBn, GBn2")
if params['leap_template'] is None:
parser.error(f"--leap_template is required if wish to use implicit solvent model.")
if (params['DNA_force_field'] != 'DNA.OL15') and (params['DNA_force_field'] != 'DNA.bsc1'):
parser.error(f"Invalid value: --DNA_force_field={params['DNA_force_field']}. "
"Choose from [DNA.OL15, DNA.bsc1].")
# Convert --nonbonded_method to corresponding objects in OpenMM
# In implicit solvent: only CutoffPeriodic or CutoffNonPeriodic or NoCutoff
if params['nonbonded_method'] == 'NoCutoff':
params['nonbonded_method'] = NoCutoff
elif params['nonbonded_method'] == 'CutoffNonPeriodic':
params['nonbonded_method'] = CutoffNonPeriodic
elif params['nonbonded_method'] == 'CutoffPeriodic':
params['nonbonded_method'] = CutoffPeriodic
else:
parser.error(f"Invalid value: --nonbonded_method={params['nonbonded_method']}. "
"Choose from [CutoffPeriodic, CutoffNonPeriodic or NoCutoff] for implicit solvent.")
''' When building a system in implicit solvent, no periodic boundary condition => cannot use Ewald, PME, or LJPME
=> only NoCutoff or CutoffNonPeriodic or CutoffPeriodic.
PBC is used to avoid surface effects in explicit solvent and implicit solvent does not have solvent boundaries (the continuum goes on forever).
'''
# =========================== Verification of input arguments from yaml file and command line: done ===========================
# Default Python scripts used in LightDock
params['ld_setup'] = 'lightdock3_setup.py'
params['ld_run'] = 'lightdock3.py'
params['lgd_generate'] = 'lgd_generate_conformations.py'
params['lgd_cluster'] = 'lgd_cluster_bsas.py'
params['lgd_rank'] = 'lgd_rank.py'
params['lgd_top'] = 'lgd_top.py'
# print(params)
# =========================== main function entrance ===========================
if __name__ == '__main__':
opendna = opendna(params) # instantiate the class
final_output_dict = opendna.run() # retrieve binding information (eventually this should become a normalized c-number)
printRecord('\nPipeline successfully stopped.\n')