From 83398b31bbd2d735e75e9a3cf6e13baa43cb4ed7 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 30 Apr 2020 14:51:37 -0400 Subject: [PATCH 01/12] update to new pep and piface formats --- pipeline_interface.yaml | 24 ++++++------------------ project/cfg.yaml | 9 +++++---- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml index 71e22e9..f9c863a 100644 --- a/pipeline_interface.yaml +++ b/pipeline_interface.yaml @@ -1,19 +1,7 @@ -protocol_mapping: - bedbuncher: bedbuncher +pipeline_name: BEDBUNCHER -pipelines: - bedbuncher: - name: bedbuncher - path: pipelines/bedbuncher.py - looper_args: True - arguments: - "--JSON-query-path": JSONquery_path - "--bedset-name": bedset_name - optional_arguments: - "--bedbase-config": bbconfig_path - resources: - default: - file_size: "0" - cores: "1" - mem: "12000" - time: "00-18:00:00" +sample_pipeline: + path: pipelines/bedbuncher.py + command_template: > + {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} + {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} \ No newline at end of file diff --git a/project/cfg.yaml b/project/cfg.yaml index 923b292..3f5b6a2 100644 --- a/project/cfg.yaml +++ b/project/cfg.yaml @@ -1,7 +1,5 @@ +pep_version: 2.0.0 sample_table: bedset_query.csv -looper: - output_dir: . - pipeline_interfaces: ../pipeline_interface.yaml sample_modifiers: derive: @@ -10,4 +8,7 @@ sample_modifiers: source1: ../tests/{JSONquery_name}.json source2: ../tests/BEDset_output/ append: - protocol: "bedbuncher" \ No newline at end of file + pipeline_interfaces: ../pipeline_interface_new.yaml + +looper: + output_dir: . \ No newline at end of file From 72dcf4895ee93a325ef47f8dbbb35e40488be5b5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 30 Apr 2020 18:49:09 -0400 Subject: [PATCH 02/12] add output directory to command template --- pipeline_interface.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml index f9c863a..2473b39 100644 --- a/pipeline_interface.yaml +++ b/pipeline_interface.yaml @@ -3,5 +3,5 @@ pipeline_name: BEDBUNCHER sample_pipeline: path: pipelines/bedbuncher.py command_template: > - {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} - {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} \ No newline at end of file + {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} -O {looper.output_folder} + {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} From bca64761b05807cdcaf74c6eae125a23d37746a7 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 1 May 2020 10:41:43 -0400 Subject: [PATCH 03/12] output_parent to output_folder --- pipelines/bedbuncher.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pipelines/bedbuncher.py b/pipelines/bedbuncher.py index 0d8104f..bf62265 100755 --- a/pipelines/bedbuncher.py +++ b/pipelines/bedbuncher.py @@ -38,7 +38,7 @@ # SET OUTPUT FOLDER # use output parent argument from looper to place pipeline stats (live separately from bedset results) -out_parent = args.output_parent +out_parent = args.output_folder bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config)) @@ -220,9 +220,8 @@ def main(): format(**cmd_vars) pm.run(cmd=command, target=json_file_path) - # Create a folder to place pipeline logs if we want to run a pipeline in the bedset - logs_name = "bedbuncher_pipeline_logs" + logs_name = "bedbuncher_pipeline_logs" logs_dir = os.path.abspath(os.path.join( bbc[CFG_PATH_KEY][CFG_BEDBUNCHER_OUTPUT_KEY], logs_name)) From 5fa2c2560d32d34a6c6ec781c35b6beeec1aa226 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 1 May 2020 16:59:11 -0400 Subject: [PATCH 04/12] add input schema in piface --- pipeline_interface.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml index 2473b39..db8162e 100644 --- a/pipeline_interface.yaml +++ b/pipeline_interface.yaml @@ -2,6 +2,7 @@ pipeline_name: BEDBUNCHER sample_pipeline: path: pipelines/bedbuncher.py + input_schema: http://schema.databio.org/pipelines/bedbuncher.yaml command_template: > {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} -O {looper.output_folder} {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} From e1bc14ab9d94d50cc15d94429c8e4e4ebb280ab4 Mon Sep 17 00:00:00 2001 From: joseverdezoto Date: Mon, 11 May 2020 12:28:37 -0400 Subject: [PATCH 05/12] update bedset produced PEP to 2.0.0 --- pipelines/bedbuncher.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pipelines/bedbuncher.py b/pipelines/bedbuncher.py index bf62265..efd0f72 100755 --- a/pipelines/bedbuncher.py +++ b/pipelines/bedbuncher.py @@ -235,19 +235,23 @@ def main(): open(cfg_path, 'a').close() config_attamp = yacman.YacAttMap(filepath=cfg_path) with config_attamp as y: - y.metadata = {} - y.metadata.sample_table = bedset_annotation_sheet - y.metadata.output_dir = logs_dir + y.pep_version = {} + y.pep_version = "2.0.0" + y.sample_table = {} + y.sample_table = bedset_annotation_sheet + y.looper = {} + y.looper.output_dir = logs_dir y.iGD_db = {} y.iGD_db = os.path.join(igd_folder_name, args.bedset_name + ".igd") y.iGD_index = {} y.iGD_index = os.path.join(igd_folder_name, args.bedset_name + "_index.tsv") - y.constant_attributes = {} - y.constant_attributes.output_file_path = "source1" - y.derived_attributes = {} - y.derived_attributes = ["output_file_path"] - y.data_sources = {} - y.data_sources = {"source1": "{sample_name}.bed.gz"} + y.sample_modifiers = {} + y.sample_modifiers.append = {} + y.sample_modifiers.append.output_file_path = "source1" + y.sample_modifiers.derive = {} + y.sample_modifiers.derive.attributes = ["output_file_path"] + y.sample_modifiers.derive.sources = {} + y.sample_modifiers.derive.sources = {"source1": "{sample_name}.bed.gz"} # Create a tar archive using bed files original paths and bedset PEP tar_archive_file = os.path.abspath(os.path.join(output_folder, args.bedset_name + '.tar')) From 0b6ef42462e4d883b7b77b6b7fdccdf023f59532 Mon Sep 17 00:00:00 2001 From: joseverdezoto Date: Mon, 11 May 2020 18:09:41 -0400 Subject: [PATCH 06/12] remove automatic looper outfolder from pipeline args --- pipelines/bedbuncher.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pipelines/bedbuncher.py b/pipelines/bedbuncher.py index efd0f72..79a87b1 100755 --- a/pipelines/bedbuncher.py +++ b/pipelines/bedbuncher.py @@ -36,11 +36,19 @@ args = parser.parse_args() +# Initialize bbc object +bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config)) + # SET OUTPUT FOLDER -# use output parent argument from looper to place pipeline stats (live separately from bedset results) -out_parent = args.output_folder +# Create a folder to place pipeline logs +logs_name = "bedbuncher_pipeline_logs" +logs_dir = os.path.abspath(os.path.join( + bbc[CFG_PATH_KEY][CFG_BEDBUNCHER_OUTPUT_KEY], logs_name)) + +if not os.path.exists(logs_dir): + print("bedbuncher pipeline logs directory doesn't exist. Creating one...") + os.makedirs(logs_dir) -bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config)) def JSON_to_dict(file_name): @@ -79,7 +87,7 @@ def get_bedset_digest(sr): def main(): - pm = pypiper.PipelineManager(name="bedbuncher", outfolder=out_parent, args=args) + pm = pypiper.PipelineManager(name="bedbuncher", outfolder=logs_dir, args=args) # Establish Elasticsearch connection and check status using bbconf bbc.establish_elasticsearch_connection() @@ -220,14 +228,6 @@ def main(): format(**cmd_vars) pm.run(cmd=command, target=json_file_path) - # Create a folder to place pipeline logs if we want to run a pipeline in the bedset - logs_name = "bedbuncher_pipeline_logs" - logs_dir = os.path.abspath(os.path.join( - bbc[CFG_PATH_KEY][CFG_BEDBUNCHER_OUTPUT_KEY], logs_name)) - - if not os.path.exists(logs_dir): - print("bedbuncher pipeline logs directory doesn't exist. Creating one...") - os.makedirs(logs_dir) # create yaml config file for newly produced bedset # create an empty file to write the cfg to From 7443acb653e41de0a1c47fd4d3cae4a937974378 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 13 May 2020 15:46:57 -0400 Subject: [PATCH 07/12] update piface and example pep --- .gitignore | 78 +++++++++++++++++++++++++++++++++++++++++ pipeline_interface.yaml | 13 ++++--- project/cfg.yaml | 5 +-- 3 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b2fe827 --- /dev/null +++ b/.gitignore @@ -0,0 +1,78 @@ +# ignore test results +oldtests/test/* + +# toy/experimental files +*.pkl + +# ignore eggs +.eggs/ + +# generic ignore list: +*.lst + +# Compiled source +*.com +*.class +*.dll +*.exe +*.o +*.so +*.pyc + +# Packages +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases +*.log +*.sql +*.sqlite + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Gedit temporary files +*~ + +# libreoffice lock files: +.~lock* + +# Default-named test output +microtest/ +open_pipelines/ + +# IDE-specific items +.idea/ + +# pytest-related +.cache/ +.coverage* +.pytest_cache + +# Reserved files for comparison +*RESERVE* + +doc/ +build/ +dist/ +looper.egg-info/ +loopercli.egg-info/ +__pycache__/ + + +*ipynb_checkpoints* +hello_looper-master* diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml index db8162e..83c2d69 100644 --- a/pipeline_interface.yaml +++ b/pipeline_interface.yaml @@ -1,8 +1,7 @@ pipeline_name: BEDBUNCHER - -sample_pipeline: - path: pipelines/bedbuncher.py - input_schema: http://schema.databio.org/pipelines/bedbuncher.yaml - command_template: > - {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} -O {looper.output_folder} - {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} +pipeline_type: sample +path: pipelines/bedbuncher.py +input_schema: http://schema.databio.org/pipelines/bedbuncher.yaml +command_template: > + {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} -O {looper.output_folder} + {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} \ No newline at end of file diff --git a/project/cfg.yaml b/project/cfg.yaml index 3f5b6a2..7b679c2 100644 --- a/project/cfg.yaml +++ b/project/cfg.yaml @@ -8,7 +8,8 @@ sample_modifiers: source1: ../tests/{JSONquery_name}.json source2: ../tests/BEDset_output/ append: - pipeline_interfaces: ../pipeline_interface_new.yaml + pipeline_interfaces: ../pipeline_interface.yaml looper: - output_dir: . \ No newline at end of file + run: + output_dir: $HOME/testing/bedstat \ No newline at end of file From 18562952222c156803f14f4e46856cb3241cf620 Mon Sep 17 00:00:00 2001 From: joseverdezoto Date: Fri, 22 May 2020 13:22:15 -0400 Subject: [PATCH 08/12] update bedset produced PEP 2.0 --- pipelines/bedbuncher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/bedbuncher.py b/pipelines/bedbuncher.py index 79a87b1..c3c36e6 100755 --- a/pipelines/bedbuncher.py +++ b/pipelines/bedbuncher.py @@ -240,7 +240,8 @@ def main(): y.sample_table = {} y.sample_table = bedset_annotation_sheet y.looper = {} - y.looper.output_dir = logs_dir + y.looper.run = {} + y.looper.run.output_dir = logs_dir y.iGD_db = {} y.iGD_db = os.path.join(igd_folder_name, args.bedset_name + ".igd") y.iGD_index = {} From de50080ee53fb5cfd4f1a06d950fb26e7134e9ce Mon Sep 17 00:00:00 2001 From: joseverdezoto Date: Fri, 22 May 2020 13:33:31 -0400 Subject: [PATCH 09/12] remove looper args from pipeline --- pipelines/bedbuncher.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipelines/bedbuncher.py b/pipelines/bedbuncher.py index c3c36e6..2dbde67 100755 --- a/pipelines/bedbuncher.py +++ b/pipelines/bedbuncher.py @@ -30,8 +30,7 @@ parser.add_argument("-n", "--bedset-name", help="name assigned to queried bedset", type=str) # add pypiper args to make pipeline looper compatible -parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "looper"], - required=["--JSON-query-path", "--bedset_name"]) +parser = pypiper.add_pypiper_args(parser, groups=["pypiper"], required=["--JSON-query-path", "--bedset_name"]) args = parser.parse_args() From cca10ad7c2de07178d029576d2d8892943c3b1bf Mon Sep 17 00:00:00 2001 From: joseverdezoto Date: Thu, 28 May 2020 11:26:52 -0400 Subject: [PATCH 10/12] edit bedset PEP looper section --- pipelines/bedbuncher.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipelines/bedbuncher.py b/pipelines/bedbuncher.py index 2dbde67..1553650 100755 --- a/pipelines/bedbuncher.py +++ b/pipelines/bedbuncher.py @@ -239,8 +239,7 @@ def main(): y.sample_table = {} y.sample_table = bedset_annotation_sheet y.looper = {} - y.looper.run = {} - y.looper.run.output_dir = logs_dir + y.looper.output_dir = logs_dir y.iGD_db = {} y.iGD_db = os.path.join(igd_folder_name, args.bedset_name + ".igd") y.iGD_index = {} From 08b806fbff02d3238720f05583f8c0abed821eb6 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 28 May 2020 15:14:10 -0400 Subject: [PATCH 11/12] Update pipeline_interface.yaml --- pipeline_interface.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml index 83c2d69..150ae91 100644 --- a/pipeline_interface.yaml +++ b/pipeline_interface.yaml @@ -3,5 +3,4 @@ pipeline_type: sample path: pipelines/bedbuncher.py input_schema: http://schema.databio.org/pipelines/bedbuncher.yaml command_template: > - {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} -O {looper.output_folder} - {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bbconfig_path} {% endif %} \ No newline at end of file + {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bedbase_config} {% endif %} From 72bb36b5ddf6f20a92c600eb753b04bbf68844af Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 28 May 2020 15:23:40 -0400 Subject: [PATCH 12/12] Update pipeline_interface.yaml --- pipeline_interface.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml index 150ae91..f158917 100644 --- a/pipeline_interface.yaml +++ b/pipeline_interface.yaml @@ -3,4 +3,4 @@ pipeline_type: sample path: pipelines/bedbuncher.py input_schema: http://schema.databio.org/pipelines/bedbuncher.yaml command_template: > - {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} {% if sample.bbconfig_path is defined %} --bedbase-config {sample.bedbase_config} {% endif %} + {pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}