-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_wugb_json.py
executable file
·148 lines (116 loc) · 4.12 KB
/
make_wugb_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3
"""
This script scans a web server's directory (based on `servers` dict),
and saves a JSON file that includes all supported data files in this
directory.
The output JSON file can be loaded directly into Washington University
Genome Browser (WUGB).
Two arguments are required by the script:
* genome type, such as `hg19` or `hg38`
* directory name
Two new files will be generated in the input directory:
* `wugb.json`: JSON file for WUGB
* `wugb_url.txt`: the URL of WUGB (also shown at the end of this program)
"""
import json
import os
import socket
import sys
# Web servers config
servers = {
# plutus
'plutus': {
'www_root': '/mnt/data1/www/html/',
'main_url': 'https://plutus.faryabilab.com/',
},
# simurgh
'simurgh': {
'www_root': '/mnt/data0/www/html/',
'main_url': 'https://simurgh.faryabilab.com/',
},
}
# Types of data that will be rendered in WashU Genome Browser
data_types = {
'.bw': 'bigwig',
'.cool': 'cool',
'.hic': 'hic',
}
# Main URL of WUGB
WUGB_URL = "https://epigenomegateway.wustl.edu/browser/"
# Output filenames
JSON_FILENAME = "wugb.json"
URL_FILENAME = "wugb_url.txt"
def syntax():
print("Syntax:")
print(" make_wugb_json.py [genome_type] [data_directory]\n")
print("For example:")
print(" make_wugb_json.py hg38 my_data")
def chk_data_dir(dir_name, data_root):
"""
Check whether:
* `dir_name` is a directory
* `dirname` is a sub-directory of `data_root`
Return the absolute path of `dir_name`.
"""
abs_path = os.path.abspath(dir_name)
if not os.path.isdir(abs_path):
print(f"ERROR: '{dir_name}' is not a directory")
sys.exit(2)
if not abs_path.startswith(data_root):
print(f"ERROR: '{dir_name}' is not located in '{data_root}'")
sys.exit(3)
return abs_path
# Main
if __name__ == '__main__':
if len(sys.argv) != 3:
syntax()
sys.exit(1)
# Exit if the server is not found in `servers`
server_name = socket.gethostname()
if server_name not in servers:
print(f"ERROR: '{server_name}' not supported")
sys.exit(3)
www_root = servers[server_name]['www_root']
main_url = servers[server_name]['main_url']
genome_type = sys.argv[1]
data_dir = sys.argv[2]
abs_data_dir = chk_data_dir(data_dir, www_root)
wugb_hub = list()
for root, _, files in os.walk(abs_data_dir):
# Make sure `root` is accessible by all users
os.chmod(root, 0o755)
for f in files:
# Skip the files whose types are not supported
_, f_type = os.path.splitext(f)
if f_type not in data_types:
continue
abs_file_path = os.path.join(root, f)
sub_url = os.path.relpath(abs_file_path, start=www_root)
data_url = main_url + sub_url
hub_entry = dict()
hub_entry["type"] = data_types[f_type]
hub_entry["url"] = data_url
hub_entry["name"] = f.split('.')[0]
hub_entry["showOnHubLoad"] = True
# Special options for `cool` and `hic` data types, see:
# https://epigenomegateway.readthedocs.io/en/latest/datahub.html#example-hic-track
if f_type in ['cool', 'hic']:
hub_entry['options'] = {
'displayMode': 'arc',
}
wugb_hub.append(hub_entry) # add entry to hub
# Create `wugb.json` in `data_dir`
json_path = os.path.join(abs_data_dir, JSON_FILENAME)
with open(json_path, "w") as ofh:
json.dump(wugb_hub, ofh, indent=2)
ofh.write("\n") # end the JSON file with a newline character
json_url = main_url + os.path.relpath(json_path, start=www_root)
# Create `wugb_url.txt` in `bw_dir`:
url_path = os.path.join(abs_data_dir, URL_FILENAME)
url_str = f"{WUGB_URL}?genome={genome_type}&hub={json_url}"
with open(url_path, "w") as ofh:
ofh.write(f"{url_str}\n")
# Print `json_path` and `url_str` on stdout
print(f"JSON file is saved as: {json_path}")
print(f"View it at: {url_str}")
print(f"URL is saved as: {url_path}")