-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess_data.jl
48 lines (43 loc) · 1.28 KB
/
preprocess_data.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
using ArgParse
using TrainPCGenomes
using Random
if abspath(PROGRAM_FILE) == @__FILE__
s = ArgParseSettings()
@add_arg_table s begin
"--datasetsname"
help = "Dataset name"
default = nothing
"--data_dir"
help = "Data directory"
arg_type = String
default = "./data1kg"
"--num_k"
help = "Number of k-fold cross validation"
arg_type = Int64
default = 5
"--valid_percent"
help = "Percentage of validation set splitted out from training set"
arg_type = Float64
default = 0.1
"--seed"
help = "Random seed"
arg_type = Int64
default = 1337
end
args = parse_args(ARGS, s)
println(args)
Random.seed!(args["seed"])
# for name in ["mhc", "chr5.sub"]
# println("Transforming dataset $name from vcf")
# transform_vcf_to_data(name;
# data_dir=args["data_dir"]);
# end
for name in ["805", "10K"] #, "mhc", "chr5.sub"]
println("Spliting dataset $name")
k_fold_cv(name;
k=args["num_k"],
seed=args["seed"],
valid_percent=args["valid_percent"],
data_dir=args["data_dir"])
end
end