Skip to content
This repository has been archived by the owner on Dec 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #20 from databio/cfg2
Browse files Browse the repository at this point in the history
Cfg2
  • Loading branch information
stolarczyk authored May 28, 2020
2 parents a95fad5 + 72bb36b commit 0d9a4e4
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 47 deletions.
78 changes: 78 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# ignore test results
oldtests/test/*

# toy/experimental files
*.pkl

# ignore eggs
.eggs/

# generic ignore list:
*.lst

# Compiled source
*.com
*.class
*.dll
*.exe
*.o
*.so
*.pyc

# Packages
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip

# Logs and databases
*.log
*.sql
*.sqlite

# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

# Gedit temporary files
*~

# libreoffice lock files:
.~lock*

# Default-named test output
microtest/
open_pipelines/

# IDE-specific items
.idea/

# pytest-related
.cache/
.coverage*
.pytest_cache

# Reserved files for comparison
*RESERVE*

doc/
build/
dist/
looper.egg-info/
loopercli.egg-info/
__pycache__/


*ipynb_checkpoints*
hello_looper-master*
25 changes: 6 additions & 19 deletions pipeline_interface.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
protocol_mapping:
bedbuncher: bedbuncher

pipelines:
bedbuncher:
name: bedbuncher
path: pipelines/bedbuncher.py
looper_args: True
arguments:
"--JSON-query-path": JSONquery_path
"--bedset-name": bedset_name
optional_arguments:
"--bedbase-config": bbconfig_path
resources:
default:
file_size: "0"
cores: "1"
mem: "12000"
time: "00-18:00:00"
pipeline_name: BEDBUNCHER
pipeline_type: sample
path: pipelines/bedbuncher.py
input_schema: http://schema.databio.org/pipelines/bedbuncher.yaml
command_template: >
{pipeline.path} --JSON-query-path {sample.JSONquery_path} --bedset-name {sample.bedset_name} {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}
50 changes: 26 additions & 24 deletions pipelines/bedbuncher.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,24 @@
parser.add_argument("-n", "--bedset-name", help="name assigned to queried bedset", type=str)

# add pypiper args to make pipeline looper compatible
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "looper"],
required=["--JSON-query-path", "--bedset_name"])
parser = pypiper.add_pypiper_args(parser, groups=["pypiper"], required=["--JSON-query-path", "--bedset_name"])


args = parser.parse_args()

# Initialize bbc object
bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config))

# SET OUTPUT FOLDER
# use output parent argument from looper to place pipeline stats (live separately from bedset results)
out_parent = args.output_parent
# Create a folder to place pipeline logs
logs_name = "bedbuncher_pipeline_logs"
logs_dir = os.path.abspath(os.path.join(
bbc[CFG_PATH_KEY][CFG_BEDBUNCHER_OUTPUT_KEY], logs_name))

if not os.path.exists(logs_dir):
print("bedbuncher pipeline logs directory doesn't exist. Creating one...")
os.makedirs(logs_dir)

bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config))


def JSON_to_dict(file_name):
Expand Down Expand Up @@ -79,7 +86,7 @@ def get_bedset_digest(sr):


def main():
pm = pypiper.PipelineManager(name="bedbuncher", outfolder=out_parent, args=args)
pm = pypiper.PipelineManager(name="bedbuncher", outfolder=logs_dir, args=args)

# Establish Elasticsearch connection and check status using bbconf
bbc.establish_elasticsearch_connection()
Expand Down Expand Up @@ -220,35 +227,30 @@ def main():
format(**cmd_vars)
pm.run(cmd=command, target=json_file_path)


# Create a folder to place pipeline logs if we want to run a pipeline in the bedset
logs_name = "bedbuncher_pipeline_logs"
logs_dir = os.path.abspath(os.path.join(
bbc[CFG_PATH_KEY][CFG_BEDBUNCHER_OUTPUT_KEY], logs_name))

if not os.path.exists(logs_dir):
print("bedbuncher pipeline logs directory doesn't exist. Creating one...")
os.makedirs(logs_dir)

# create yaml config file for newly produced bedset
# create an empty file to write the cfg to
cfg_path = os.path.join(pep_folder_path, args.bedset_name + "_cfg.yaml")
open(cfg_path, 'a').close()
config_attamp = yacman.YacAttMap(filepath=cfg_path)
with config_attamp as y:
y.metadata = {}
y.metadata.sample_table = bedset_annotation_sheet
y.metadata.output_dir = logs_dir
y.pep_version = {}
y.pep_version = "2.0.0"
y.sample_table = {}
y.sample_table = bedset_annotation_sheet
y.looper = {}
y.looper.output_dir = logs_dir
y.iGD_db = {}
y.iGD_db = os.path.join(igd_folder_name, args.bedset_name + ".igd")
y.iGD_index = {}
y.iGD_index = os.path.join(igd_folder_name, args.bedset_name + "_index.tsv")
y.constant_attributes = {}
y.constant_attributes.output_file_path = "source1"
y.derived_attributes = {}
y.derived_attributes = ["output_file_path"]
y.data_sources = {}
y.data_sources = {"source1": "{sample_name}.bed.gz"}
y.sample_modifiers = {}
y.sample_modifiers.append = {}
y.sample_modifiers.append.output_file_path = "source1"
y.sample_modifiers.derive = {}
y.sample_modifiers.derive.attributes = ["output_file_path"]
y.sample_modifiers.derive.sources = {}
y.sample_modifiers.derive.sources = {"source1": "{sample_name}.bed.gz"}

# Create a tar archive using bed files original paths and bedset PEP
tar_archive_file = os.path.abspath(os.path.join(output_folder, args.bedset_name + '.tar'))
Expand Down
10 changes: 6 additions & 4 deletions project/cfg.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
pep_version: 2.0.0
sample_table: bedset_query.csv
looper:
output_dir: .
pipeline_interfaces: ../pipeline_interface.yaml

sample_modifiers:
derive:
Expand All @@ -10,4 +8,8 @@ sample_modifiers:
source1: ../tests/{JSONquery_name}.json
source2: ../tests/BEDset_output/
append:
protocol: "bedbuncher"
pipeline_interfaces: ../pipeline_interface.yaml

looper:
run:
output_dir: $HOME/testing/bedstat

0 comments on commit 0d9a4e4

Please sign in to comment.