-
Notifications
You must be signed in to change notification settings - Fork 1
/
init.py
46 lines (38 loc) · 1.52 KB
/
init.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
from collections import defaultdict
import pandas as pd
import argparse
def add_sample(sample_dict, sample_id, header, path):
sample_dict[sample_id][header]=path
def get_samples(path, dir_str):
"""
create a table containing the paths to basecalled dir of each barcodes
"""
samples = defaultdict(dict)
for root, dirs, fqs in os.walk(os.path.abspath(path)):
for dirname in dirs:
# only check fq files
if dir_str in dirname:
dir_path = os.path.join(root, dirname)
root_split = root.split(os.sep)
sample_barcode = root_split[-2] + "_" + dirname # specify by run
add_sample(samples, sample_barcode, "fq_dir", dir_path)
samples_dt = pd.DataFrame(samples).T
return samples_dt
def parse_arguments():
"""Read arguments from the console"""
parser = argparse.ArgumentParser(description="Note: generate sample.tsv")
parser.add_argument("-p", "--path", help='path to raw data')
parser.add_argument("--dir_str", help='the shared string in sample dirnames', default='barcode')
parser.add_argument("-o", "--out", help='path to working directory')
args = parser.parse_args()
return args
def main():
args = parse_arguments()
sample_dt = get_samples(args.path, args.dir_str)
sample_dt.to_csv(args.out + "/samples.tsv", sep="\t")
if __name__ == "__main__":
main()
for root, dirs, files in os.walk("raw", topdown=False):
for name in dirs:
print(os.path.join(root, name))