From 841b2ecaaa915fc84bd9ba91e56b14d4df25ae50 Mon Sep 17 00:00:00 2001 From: Ivan Blagoev Topolsky Date: Mon, 18 Dec 2023 18:03:17 +0100 Subject: [PATCH] Secondary alignments confuse downstream - Minimap2 includes secondary alignment in its output (useful for debugging) - As spacing-saving feature these do not repeat the SEQ and QUAL from the primary alignment - This breaks COJAC and SmallGenomeUtilities --- config/config.html | 3 ++- workflow/envs/minimap_align.yaml | 4 ++-- workflow/rules/align.smk | 1 + workflow/schemas/config_schema.json | 6 ++++++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/config/config.html b/config/config.html index fd5e2403..10359a1e 100644 --- a/config/config.html +++ b/config/config.html @@ -142,6 +142,7 @@

Type: string Default: ""

Pass additional options to run ngshmmalign

V-pipe uses option -R <path/to/initial_reference>, thus option -r arg is not allowed. Also, instead of passing -l via the property extra, set leave_msa_temp to True. Lastly, please do not modify options -o arg, -w arg, -t arg, and -N arg. These are already managed by V-pipe.

Type: object Default: {}

Type: integer Default: 5000

Type: integer Default: 30

Type: string Default: "{VPIPE_BASEDIR}/envs/sam2bam.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/bwa_QA.yaml"

Type: string Default: ""

Panel of diverse references against which to align reads as a QA step

Note: The virus-specific base configuration specified in general => virus_base_config will most likely change this option’s default.
You are still free to override that default in your configuration shall the need arise.


Example:

resources/hiv/5-Virus-Mix.fasta
 

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: string Default: "HXB2:6614-6812,7109-7217,7376-7478,7601-7634"

Type: object Default: {}

This rule takes all previously aligned reads by hmm_align. Therefore, resources should be allocated accordingly.

Type: integer Default: 10000

Type: integer Default: 235

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/msa.yaml"

Type: object Default: {}

Type: integer Default: 8000

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: object Default: {}

Type: integer Default: 2000

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/bwa_align.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/bwa_align.yaml"

Type: string Default: ""

With property extra, users can pass additional options to run BWA MEM. For more details on BWA MEM configurable options refer to the software documentation.

Type: object Default: {}

Type: integer Default: 2000

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/bowtie_align.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/bowtie_align.yaml"

Type: enum (of string) Default: "--phred33"

Indicate if qualities are Phred+33 (default) or Phred+64 (--phred64).

Must be one of:

  • "--phred33"
  • "--phred64"

Example:

--phred64
 

Type: string Default: "--local --sensitive-local"

Specify Bowtie 2 presets.

Type: integer

Type: string Default: ""

Pass additional options to run Bowtie 2. V-pipe handles the input and output files, as well as the reference sequence. Thus, do not modify these options
For more details on Bowtie 2 configurable options refer to the software documentation.

Type: object Default: {}

Type: integer Default: 2000

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/minimap_align.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: integer

Type: enum (of string) Default: "sr"

Specify minimap2 preset options. See minimape’s documentation for details about each of the presets.

Must be one of:

  • "map-ont"
  • "map-hifi"
  • "map-pb"
  • "asm5"
  • "asm10"
  • "asm20"
  • "splice"
  • "splice:hq"
  • "sr"
  • "ava-pb"
  • "ava-ont"

Example:

map-ont
+

Type: boolean Default: false

By default V-pipe ignores Minimap2’s secondary alignment(s) and only considers the primary one (A secondary alignment occurs when a given read could align reasonably well to more than one place). This flags turns back on Minimap2’s secondary alignments, and also includes their sequences in the output BAM file.


Example:

True
 

Type: string Default: "{VPIPE_BASEDIR}/envs/minimap_align.yaml"

Type: string Default: ""

With property extra, users can pass additional options to run minimap2. For more details on minimap2 configurable options refer to the software documentation.

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/primerstrim.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: integer Default: 50

Minimum read depth for reporting variants per locus.

Type: integer Default: 5

Read count below which ambiguous base ’n’ is reported.

Type: integer Default: 15

Minimum phred quality score for a base to be included.

Type: number Default: 0.05

Minimum frequency for an ambiguous nucleotide.

Value must be greater or equal to 0 and lesser or equal to 1

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 235

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/bcftools.yaml"

Type: integer Default: 10000

Type: integer Default: 10

Type: number Default: 0.05

Value must be greater or equal to 0 and lesser or equal to 1

Type: object Default: {}

Type: integer Default: 4096

Type: integer Default: 30

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/consseq_qa.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 30

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 30

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: object Default: {}

Type: integer Default: 1250

Type: integer Default: 30

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: object Default: {}

Type: integer Default: 1000

Type: integer Default: 235

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: integer Default: 100

Minimum read depth for reporting variants per locus.


Example:

50
 

Type: boolean Default: false

Output a numpy array file containing frequencies of all bases, including gaps and also the most abundant base across samples.


Example:

True
 

Type: object Default: {}

Type: integer Default: 1000

Type: integer Default: 30

Type: integer

Type: string Default: "{VPIPE_BASEDIR}/envs/smallgenomeutilities.yaml"

Type: boolean Default: false

Construct intervals based on overlapping windows of the read alignment. By default, regions with high coverage are built based on the position-wise read depth.


Example:

True
@@ -184,4 +185,4 @@
 

Type: boolean Default: false

Generate checksum for each individual consensus sequence (if a consensus is regenerated, it will help determine whether the new file has changed content or is virtually the same as the previous).


Example:

True
 

Type: boolean Default: false

Also include the original .fastq.gz sequencing reads files from raw_data/ in the list of files to be uploaded. See property orig_cram below for a compressed version and see output dehumanized_raw_reads and section dehuman for depleting reads from the host.


Example:

True
 

Type: boolean Default: false

Also include a compressed version of the original sequencing raw reads files from raw_data/. Similar to property orig_fastq above, but with reference-based compression.


Example:

True
-

Type: string Default: "{VPIPE_BASEDIR}/scripts/prepare_upload_symlinks.sh"

Custom script that assists and prepares uploads.

It will receive the following positional parameters:

  • <OUTPUT>: the output file that must be created by the script.
  • <SAMPLE_ID>: a string (with no path separator slashes) that can be used as a name, uniquely identifying the sample and the date.
  • <SAMPLE_DIR>: the base directory of the sample.
  • <UPLOAD_FILES>…: a list of files to consider for upload

For an example, see the default script prepare_upload_symlinks.sh, it generates symlinks that help tracking which samples are new and/or updated between runs of V-pipe and thus should be considered for upload.

Type: string Default: ""

Named options to be passed to the script, before the positional parameters. E.g. for an extra configuration file with SFTP server information.

\ No newline at end of file +

Type: string Default: "{VPIPE_BASEDIR}/scripts/prepare_upload_symlinks.sh"

Custom script that assists and prepares uploads.

It will receive the following positional parameters:

  • <OUTPUT>: the output file that must be created by the script.
  • <SAMPLE_ID>: a string (with no path separator slashes) that can be used as a name, uniquely identifying the sample and the date.
  • <SAMPLE_DIR>: the base directory of the sample.
  • <UPLOAD_FILES>…: a list of files to consider for upload

For an example, see the default script prepare_upload_symlinks.sh, it generates symlinks that help tracking which samples are new and/or updated between runs of V-pipe and thus should be considered for upload.

Type: string Default: ""

Named options to be passed to the script, before the positional parameters. E.g. for an extra configuration file with SFTP server information.

\ No newline at end of file diff --git a/workflow/envs/minimap_align.yaml b/workflow/envs/minimap_align.yaml index 984117d6..c09b28e1 100644 --- a/workflow/envs/minimap_align.yaml +++ b/workflow/envs/minimap_align.yaml @@ -3,5 +3,5 @@ channels: - bioconda - defaults dependencies: - - minimap2=2.24 - - samtools=1.10 + - minimap2=2.26 + - samtools=1.19 diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 0494239f..1c02520e 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -667,6 +667,7 @@ elif config.general["aligner"] == "minimap": SEED="--seed 42", EXTRA=config.minimap_align["extra"], PRESET=config.minimap_align["preset"], + SECONDARY="--secondary=yes --secondary-seq" if config.minimap_align["secondary"] else "--secondary=no", FILTER="-f 2" if config.input["paired"] else "-F 4", MINIMAP=config.applications["minimap"], SAMTOOLS=config.applications["samtools"], diff --git a/workflow/schemas/config_schema.json b/workflow/schemas/config_schema.json index ac554423..648e5ef6 100644 --- a/workflow/schemas/config_schema.json +++ b/workflow/schemas/config_schema.json @@ -835,6 +835,12 @@ "description": "Specify minimap2 preset options. See [minimape's documentation](https://lh3.github.io/minimap2/minimap2.html#8) for details about each of the presets.", "examples": ["map-ont"] }, + "secondary": { + "type": "boolean", + "default": false, + "description": "By default V-pipe ignores Minimap2's secondary alignment(s) and only considers the primary one (A secondary alignment occurs when a given read could align reasonably well to more than one place). This flags turns back on Minimap2's secondary alignments, and also includes their sequences in the output BAM file.", + "examples": [ true ] + }, "conda": { "type": "string", "default": "{VPIPE_BASEDIR}/envs/minimap_align.yaml"