From 7a39c255fd713fa8d76d1e3ec53aced12764f096 Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Mon, 13 May 2024 14:09:12 +1200 Subject: [PATCH] Now removing comments from fasta file before feeding it to BRAKER --- CHANGELOG.md | 18 ++++++++++++++++++ conf/base.config | 10 +++++----- modules/kherronism/braker3/main.nf | 6 +++++- modules/kherronism/braker3/tests/name.only.sh | 4 ++++ modules/kherronism/braker3/tests/test.fa | 16 ++++++++++++++++ nextflow.config | 2 +- pfr_pangene | 2 +- version_check.sh | 2 +- 8 files changed, 51 insertions(+), 9 deletions(-) create mode 100755 modules/kherronism/braker3/tests/name.only.sh create mode 100644 modules/kherronism/braker3/tests/test.fa diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f49028..bb696d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.3.2 - [13-May-2024] + +### `Added` + +### `Fixed` + +1. Increased time limit for REPEATMODELER_REPEATMODELER to 5 days +2. Now removing comments from fasta file before feeding it to BRAKER added tests for the perl one liner +3. Fixed CHANGELOG version check failure in `version_check.sh` +4. Increased the SLURM job time limit to 14 days + +### `Dependencies` + +1. NextFlow!>=23.04.4 +2. nf-validation=1.1.3 + +### `Deprecated` + ## 0.3.1 - [10-May-2024] ### `Added` diff --git a/conf/base.config b/conf/base.config index 464584c..16824cd 100644 --- a/conf/base.config +++ b/conf/base.config @@ -66,21 +66,21 @@ process { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } - withName:REPEATMODELER_REPEATMODELER { - time = { check_max( 3.days * task.attempt, 'time' ) } + withName:EGGNOGMAPPER { + time = { check_max( 1.day * task.attempt, 'time' ) } } withName:REPEATMASKER { time = { check_max( 2.days * task.attempt, 'time' ) } } + withName:REPEATMODELER_REPEATMODELER { + time = { check_max( 5.days * task.attempt, 'time' ) } + } withName:EDTA_EDTA { time = { check_max( 7.days * task.attempt, 'time' ) } } withName:BRAKER3 { time = { check_max( 7.days * task.attempt, 'time' ) } } - withName:EGGNOGMAPPER { - time = { check_max( 1.day * task.attempt, 'time' ) } - } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf index 82de764..9a15143 100644 --- a/modules/kherronism/braker3/main.nf +++ b/modules/kherronism/braker3/main.nf @@ -42,8 +42,12 @@ process BRAKER3 { """ cp -r /usr/share/augustus/config augustus_config + perl -p -e 's/^(>\\S+).*\$/\$1/' \\ + ${fasta} \\ + > ${prefix}.name.only.genome.masked.fasta + braker.pl \\ - --genome ${fasta} \\ + --genome ${prefix}.name.only.genome.masked.fasta \\ ${new_species} \\ --workingdir ${prefix} \\ --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\ diff --git a/modules/kherronism/braker3/tests/name.only.sh b/modules/kherronism/braker3/tests/name.only.sh new file mode 100755 index 0000000..172e651 --- /dev/null +++ b/modules/kherronism/braker3/tests/name.only.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +perl -p -e 's/^(>\S+).*$/$1/' \ + modules/kherronism/braker3/tests/test.fa diff --git a/modules/kherronism/braker3/tests/test.fa b/modules/kherronism/braker3/tests/test.fa new file mode 100644 index 0000000..8b195b8 --- /dev/null +++ b/modules/kherronism/braker3/tests/test.fa @@ -0,0 +1,16 @@ +>chr1 This is with four spaces and a space and a tab +AAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAAC +CCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTA +>chr2 This is with four spaces +TAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAA +CCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCT +AAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAAC +>chr3 This is with a single space +TAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAA +CCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCT +>chrX +AACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACC +CTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAA +>chrY This desc is with tab and another tab | and a vertical slash +AACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACC +CTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAA diff --git a/nextflow.config b/nextflow.config index eec5b1a..bd4cccc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -58,7 +58,7 @@ manifest { description = """A NextFlow pipeline for pan-genome annotation""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.4' - version = '0.3.1' + version = '0.3.2' doi = '' } diff --git a/pfr_pangene b/pfr_pangene index 4f9bf4c..9b31b44 100644 --- a/pfr_pangene +++ b/pfr_pangene @@ -2,7 +2,7 @@ #SBATCH --job-name PANGENE -#SBATCH --time=7-00:00:00 +#SBATCH --time=14-00:00:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 diff --git a/version_check.sh b/version_check.sh index 131d97c..29b89ad 100755 --- a/version_check.sh +++ b/version_check.sh @@ -4,5 +4,5 @@ config_version=$(sed -n "s/.*version.*= '\(.*\)'.*/\1/p" nextflow.config) # Check CHANGELOG version -grep "## $config_version - " CHANGELOG.md >/dev/null \ +head -n 10 CHANGELOG.md | grep "## $config_version - " >/dev/null \ || (echo 'Failed to match CHANGELOG version'; exit 1)