-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add blast.ml to lib/bioinfo #38
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
open Core | ||
open Bistro.Std | ||
open Bistro.EDSL | ||
open Bistro_bioinfo.Std | ||
|
||
type db = [`blast_db] directory | ||
let env = docker_image ~account:"pveber" ~name:"ncbi-blast" ~tag:"2.4.0" () | ||
|
||
let db_name = "db" | ||
|
||
let fastadb fa dbtype = | ||
workflow ~descr:"blast.makedb" [ | ||
mkdir_p dest ; | ||
cmd ~env "makeblastdb" [ | ||
opt "-in" dep fa ; | ||
opt "-dbtype" ident dbtype ; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the type of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we have to create the type ? In the .ml ? I don't really see how to call the function then. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually if you use polymorphic variants |
||
opt "-out" ident (dest // db_name) ; | ||
] ; | ||
] | ||
|
||
(* Basic blastn*) | ||
|
||
let blastn ?evalue ?word_size ?task ?gapopen ?gapextend ?penalty | ||
?reward ?outfmt ?perc_identity ?qcov_hsp_perc ?max_hsps ?max_target_seqs ?(threads = 4) db query out_name = (*See blastn documentation to know what options are*) | ||
workflow ~descr:"blastn" ~np:threads [ | ||
mkdir_p dest ; | ||
cmd "blastn" ~env [ | ||
opt "-db" ident (dep db // db_name) ; | ||
opt "-query" dep query ; | ||
opt "-out" ident (dest // out_name) ; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the motivation for creating a directory here? Couldn't we give There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because it's already created in makedb ? Or do we have to create it in the main pipeline ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well yes, the same could be said about the database, I think in both cases you don't really need to create a directory and put the resulting file in it. The resulting file should be put at the location There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be more precise in think your wrapper could be simplified like this: opt "-db" dep db ;
opt "-out" ident dest ; of course the wrapper for |
||
option (opt "-evalue" float) evalue ; | ||
option (opt "-word_size" int) word_size ; | ||
option (opt "-task" string) task ; | ||
option (opt "-gapopen" int) gapopen ; | ||
option (opt "-gapextend" int) gapextend ; | ||
option (opt "-penalty" int) penalty ; | ||
option (opt "-reward" int) reward ; | ||
option (opt "-outfmt" string) outfmt ; | ||
option (opt "-perc_identity" float) perc_identity ; | ||
option (opt "-qcov_hsp_perc" float) qcov_hsp_perc ; | ||
option (opt "-max_hsps" int) max_hsps ; | ||
option (opt "-max_target_seqs" int) max_target_seqs ; | ||
opt "-num_threads" ident np ; | ||
] | ||
] | ||
|
||
let blastp ?evalue ?word_size ?task ?gapopen ?gapextend ?penalty | ||
?reward ?outfmt ?perc_identity ?qcov_hsp_perc ?max_hsps ?max_target_seqs ?(threads = 4) db query out_name = (*See blastn documentation to know what options are*) | ||
workflow ~descr:"blastp" ~np:threads [ | ||
mkdir_p dest ; | ||
cmd "blastp" ~env [ | ||
opt "-db" ident (dep db // db_name) ; | ||
opt "-query" dep query ; | ||
opt "-out" ident (dest // out_name) ; | ||
option (opt "-evalue" float) evalue ; | ||
option (opt "-word_size" int) word_size ; | ||
option (opt "-task" string) task ; | ||
option (opt "-gapopen" int) gapopen ; | ||
option (opt "-gapextend" int) gapextend ; | ||
option (opt "-penalty" int) penalty ; | ||
option (opt "-reward" int) reward ; | ||
option (opt "-outfmt" string) outfmt ; | ||
option (opt "-perc_identity" float) perc_identity ; | ||
option (opt "-qcov_hsp_perc" float) qcov_hsp_perc ; | ||
option (opt "-max_hsps" int) max_hsps ; | ||
option (opt "-max_target_seqs" int) max_target_seqs ; | ||
opt "-num_threads" ident np ; | ||
] | ||
] | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
better call it
makedb
since it's the name of the toolThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also
dbtype
should be the first argument and named