diff --git a/README.md b/README.md index 868f1616a..81bd7fcf8 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ bash vip/install.sh ### Usage ```bash usage: vip -w -i -o - -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf + -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf, pod5 -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) @@ -39,5 +39,41 @@ pip install mkdocs mkdocs-mermaid2-plugin mkdocs serve ``` +## Proof of Concept - Methylation +All the files and directories that are adapted or added for the support of base modification and POD5 data +``` +config/nxf_pod5.config +config/nxf_vcf.config +docs/ +modules/pod5/ +modules/vcf/report.nf +modules/vcf/templates/report.sh +resources/pod5/ +test/suites/pod5/ +utils/build.sh +vip_pod5.nf +vip_vcf.nf +vip.sh +install.sh +``` + +## How to install VIP and test this branch +``` +# Clone repository and switch to PoC/Methylation branch +git clone https://github.com/molgenis/vip.git +cd vip +git checkout PoC/Methylation + +# Install to download tools +bash install.sh + +# Test the pod5 workflow +cd test +ml awscli +bash test.sh -t pod5 + +# Output can be found in test/output/ +``` + ### License VIP is an aggregate work of many works, each covered by their own licence(s). For the purposes of determining what you can do with specific works in VIP, this policy should be read together with the licence(s) of the relevant tools. For the avoidance of doubt, where any other licence grants rights, this policy does not modify or reduce those rights under those licences. diff --git a/config/nxf_pod5.config b/config/nxf_pod5.config new file mode 100644 index 000000000..656a0f626 --- /dev/null +++ b/config/nxf_pod5.config @@ -0,0 +1,39 @@ +includeConfig 'nxf.config' +includeConfig 'nxf_cram.config' + +// Environmental commands +env { + CMD_DORADO = "apptainer exec --nv --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/dorado-shac28cd94f2303b0493a4b16ca86e711852c2b8525.sif dorado" + CMD_MODKIT = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/modkit-sha3745cd8f97213eaf908f5fbf4f2f8b8e2cedfc30.sif modkit" +} + +// Process how to execute +process { + withLabel: 'dorado'{ + executor = 'slurm' + memory = '40GB' + time = '10h' + cpus = 20 + clusterOptions = '--gres=gpu:a40:1 --qos=priority' + } + + withLabel: 'sort_bam'{ + executor = 'slurm' + memory = '10GB' + time = '10h' + cpus = 10 + } + + withLabel: 'modkit'{ + executor = 'slurm' + memory = '30GB' + time = '10h' + cpus = 5 + } +} + +// Parameters used in workflow pod5 +params { + dorado_model = "${projectDir}/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/" +} + diff --git a/config/nxf_vcf.config b/config/nxf_vcf.config index 4a6f23e8f..9b67f653d 100644 --- a/config/nxf_vcf.config +++ b/config/nxf_vcf.config @@ -37,7 +37,7 @@ process { } withLabel: 'vcf_report' { - memory = '4GB' + memory = '100GB' } } @@ -106,9 +106,11 @@ params { report { include_crams = true + include_bedmethyls = true max_records = "" max_samples = "" - template = "" + template = "${projectDir}/resources/pod5/pod5_template.html" + vcf_report_jar = "${projectDir}/resources/pod5/pod5-vcf-report.jar" GRCh38 { genes = "${projectDir}/resources/GRCh38/GCF_000001405.39_GRCh38.p13_genomic_mapped.gff.gz" diff --git a/docs/about/acknowledgements.md b/docs/about/acknowledgements.md index e5383fa1b..01cdc8918 100644 --- a/docs/about/acknowledgements.md +++ b/docs/about/acknowledgements.md @@ -23,4 +23,6 @@ Standing on the shoulders of giants. This project could not have possible withou - [cuteSV](https://github.com/tjiangHIT/cuteSV) - [Straglr](https://github.com/philres/straglr) - [Stranger](https://github.com/Clinical-Genomics/stranger) -- [fastp](https://github.com/OpenGene/fastp) \ No newline at end of file +- [fastp](https://github.com/OpenGene/fastp) +- [Dorado](https://github.com/nanoporetech/dorado) +- [Modkit](https://github.com/nanoporetech/modkit) \ No newline at end of file diff --git a/docs/examples/pod5.md b/docs/examples/pod5.md new file mode 100644 index 000000000..04cf9e2e0 --- /dev/null +++ b/docs/examples/pod5.md @@ -0,0 +1,18 @@ +# POD5 +To run vip with POD5 data, just specify the POD5 paths in your sample sheet. + +## Samplesheet +See an example for the samplesheet below, the example shows the samplesheet for a run starting from the `pod5`. + +``` +individual_id pod5 +your_sample_id path/to/your/data_1.pod5,path/to/your/data_2.pod5 +``` + +## Run the pipeline +```bash +cd vip +vip --workflow pod5 --input path/to/samplesheet.tsv --output path/to/output/folder +``` + +For an example on how to execute the `pod5` workflow see [here](https://github.com/molgenis/vip/blob/229fc8c6d01bfb9e0dcdfee85d6e903b31f71f7a/test/suites/pod5/hg001_giab_2023.05.sh#L16C1-L16C28) \ No newline at end of file diff --git a/docs/get_started/requirements.md b/docs/get_started/requirements.md index 6cd9e7d88..e5e9f3a02 100644 --- a/docs/get_started/requirements.md +++ b/docs/get_started/requirements.md @@ -5,7 +5,7 @@ Before installing VIP please check whether your system meets the following requi - Bash ≥ 3.2 - Java ≥ 11 - [Apptainer](https://apptainer.org/docs/admin/main/installation.html#install-from-pre-built-packages) (setuid installation) -- 8GB RAM 1 +- 100GB RAM 1 - 150GB disk space 1) The memory requirements differ per workflow and depend, on the size of your input data, the scheduler that you use, the amount of parallelization. For example, executing VIP using a job scheduler will reduce the memory requirements on the system submitting the jobs to 1-2GB. diff --git a/docs/home/key_features.md b/docs/home/key_features.md index c9137f45d..1c84866f1 100644 --- a/docs/home/key_features.md +++ b/docs/home/key_features.md @@ -2,9 +2,11 @@ VIP is an easy to install, easy to use, portable and flexible pipeline implemented using [Nextflow](https://www.nextflow.io/). Features include: -- Workflows for a broad range of input file types: `bam`, `cram`, `fastq`, `g.vcf`, `vcf` +- Workflows for a broad range of input file types: `pod5`, `bam`, `cram`, `fastq`, `g.vcf`, `vcf` - Produces stand-alone variant interpretation HTML report with integrated genome browser - Long-read sequencing support (Oxford Nanopore, PacBio HiFi) +- Supports base modification in `cram` files with methylation tags: [SAMtags](https://samtools.github.io/hts-specs/SAMtags.pdf) +- Supports bedmethyl visualisation in genome browser - Short-read sequencing support (Illumina, both single and paired-end reads) - Supports GRCh38, supports GRCh37 and T2T via liftover - Short variant detection diff --git a/docs/index.md b/docs/index.md index 203736a1b..722513f9e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,7 +1,7 @@ # Variant Interpretation Pipeline (VIP) VIP is a flexible human variant interpretation pipeline for rare disease using state-of-the-art pathogenicity prediction ([CAPICE](https://github.com/molgenis/capice)) and template-based interactive reporting to facilitate decision support. -The VIP pipeline can be used starting from either your `fastq`, `bam/cram` or `.g.vcf/vcf` data, +The VIP pipeline can be used starting from either your `pod5`, `fastq`, `bam/cram` or `.g.vcf/vcf` data, every entry point will result in a `vcf` file with your annotated, classified and filtered variants as well as a interactive HTML report with the same variants, prioritized by the CAPICE pathogenicity score and providing additional aids like a genome browser and a representation of the decisions leading to the VIP classification. @@ -9,7 +9,7 @@ VIP can be used for single patients, families or cohort data. [Click here for a live example](vip_giab_hg001.html) -![Example report](img/report_example.png)] +![Example report](img/report_example.png) *Above: report example* diff --git a/docs/usage/command-line-options.md b/docs/usage/command-line-options.md index 828ca23f2..9cb53a159 100644 --- a/docs/usage/command-line-options.md +++ b/docs/usage/command-line-options.md @@ -7,7 +7,7 @@ In addition to the `.vcf.gz` an interactive `.html` report is produced that can ``` usage: vip -w -i -o - -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf + -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf, pod5 -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) @@ -30,6 +30,7 @@ usage: vip -w -i -o By default `vip`: - Assumes an Illumina sequencing platform was used to generate the input data +- Assumes Nanopore sequencing was used to generate input data for `pod5` workflow - Assumes whole-genome sequencing (WGS) method was used to generate the input data - Uses a GRCh38 reference genome ([GCA_000001405.15 / GCF_000001405.26](https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26/)) - Provides classification trees for default variant filtration. For details, see [here](../advanced/classification_trees.md) diff --git a/docs/usage/config.md b/docs/usage/config.md index 043364420..e2c4c6177 100644 --- a/docs/usage/config.md +++ b/docs/usage/config.md @@ -22,6 +22,11 @@ An additional configuration file can be supplied on the command-line to overwrit **Warning:** Please take note of the fact that for a different reference fasta.gz the unzipped referenfasta file is also required. Both the zipped and unzipped fasta should have an index. +### POD5 +| key | default | description | +|---------------------------|-------------|--------------------------------------------------------------------------------------------------------| +| dorado_model | *installed* | for details, see [here](https://github.com/nanoporetech/dorado) | + ### FASTQ | key | default | description | |---------------------------|-------------|--------------------------------------------------------------------------------------------------------| diff --git a/docs/usage/input.md b/docs/usage/input.md index 3eb6688ee..9ce06c480 100644 --- a/docs/usage/input.md +++ b/docs/usage/input.md @@ -38,6 +38,11 @@ The following sections describe the columns that can be used in every sample-she 1 Exception: if no probands are defined in the sample-sheet then all samples are considered to be probands. +## Columns: POD5 +| column | type | required | default | description | +|-------------------------|----------|----------|--------------|-------------------------------------------------------------------------------------------------------------| +| ``pod5`` | ``file`` | yes | | allowed file extensions: ``pod5`` | + ## Columns: FASTQ | column | type | required | default | description | |-------------------------|---------------|-----------------|--------------|-------------------------------------------------------------------------------------------------------------| @@ -68,3 +73,4 @@ The following sections describe the columns that can be used in every sample-she | ``assembly`` | ``enum`` | | ``GRCh38`` | allowed values: [``GRCh37``, ``GRCh38``, ``T2T``], value must be the same for all project samples | | ``vcf`` | ``file`` | yes | | allowed file extensions: [``vcf``, ``vcf.gz``, ``vcf.bgz``, ``bcf``, ``bcf.gz``, ``bcf.bgz``], value must be the same for all project samples | | ``cram`` | ``file`` | | | allowed file extensions: [``bam``, ``cram``, ``sam``] | +| ``bedmethyl``| ``file`` | | | allowed file extensions: ``bedmethyl`` | diff --git a/docs/usage/workflow.md b/docs/usage/workflow.md index 0f24ee2a0..87fdfbbe0 100644 --- a/docs/usage/workflow.md +++ b/docs/usage/workflow.md @@ -1,8 +1,19 @@ # Workflow -VIP consists of four workflows depending on the type of input data: fastq, bam/cram, gvcf or vcf. -The `fastq` workflow is an extension of the `cram` workflow. The `cram` and `gvcf` workflows are extensions of the `vcf` workflow. +VIP consists of five workflows depending on the type of input data: pod5, fastq, bam/cram, gvcf or vcf. +The `fastq` and `pod5` workflows RE an extension of the `cram` workflow. The `cram` and `gvcf` workflows are extensions of the `vcf` workflow. The `vcf` workflow produces the pipeline outputs as described [here](./output.md). -The following sections provide an overview of the steps of each of these workflows. +The following sections provide an overview of the steps of each of these workflows. + +## POD5 +The `pod5` workflow consists of the following steps: + +1. Parallelize sample sheet per sample and for each sample +2. Modified basecalling and alignment using [Dorado](https://github.com/nanoporetech/dorado) producing a `bam` file per sample +3. Sorting the `bam` file per sample and create an index and stats file using [Samtools](http://samtools.github.io/) +4. Perform pileup with [Modkit](https://github.com/nanoporetech/modkit) to construct a bedMethyl table per sample +5. Continue with step 3. of the `cram` workflow + +For details, see [here](https://github.com/molgenis/vip/blob/main/vip_pod5.nf). ## FASTQ The `fastq` workflow consists of the following steps: @@ -24,7 +35,7 @@ The `cram` workflow consists of the following steps: 1. Using [ExpansionHunter](https://github.com/Illumina/ExpansionHunter) for Illumina short read data. 2. Using this [fork of Straglr](https://github.com/philres/straglr) for PacBio and Nanopore long read data, this fork is chosen over the original [Straglr](https://github.com/bcgsc/straglr) because of the VCF output that enables VIP to combine it with the SV and SNV data in the VCF workflow. 4. Parallelize cram in chunks consisting of one or more contigs and for each chunk - 1. Perform short variant calling with [DeepVariant](https://github.com/google/deepvariant) producing a `gvcf` file per chunk per sample, the gvcfs of the samples in a project are than merged to one vcf per project (using [GLnexus](https://github.com/dnanexus-rnd/GLnexus). + 1. Perform short variant calling with [DeepVariant](https://github.com/google/deepvariant) producing a `gvcf` file per chunk per sample, the gvcfs of the samples in a project are than merged to one vcf per project (using [GLnexus](https://github.com/dnanexus-rnd/GLnexus)). 2. Perform structural variant calling with [Manta](https://github.com/Illumina/manta) or [cuteSV](https://github.com/tjiangHIT/cuteSV) producing a `vcf` file per chunk per project. 5. Concatenate short variant calling and structural variant calling `vcf` files per chunk per sample 6. Continue with step 3. of the `vcf` workflow diff --git a/install.sh b/install.sh index 4c6ffa483..4bdcb44cb 100755 --- a/install.sh +++ b/install.sh @@ -74,12 +74,14 @@ download_files() { urls+=("c7655e4ffce0178a1a0dcc0ed097cd8f" "images/cutesv-2.0.3.sif") urls+=("8efa3c0f6c0f5378ca22d16074f50dfe" "images/deepvariant-1.6.0.sif") urls+=("b67e8c1d774c0d22de70b7be79aaa05e" "images/deepvariant_deeptrio-1.6.0.sif") + urls+=("8d7a34c469bbd1d27c324a867713cd4b" "images/dorado-shac28cd94f2303b0493a4b16ca86e711852c2b8525.sif") urls+=("78a8ce16c9d8bac53e5fbca4f763dcef" "images/expansionhunter-5.0.0.sif") urls+=("afed919dc16ccdae1869cf6dbc5a19d5" "images/fastp-0.23.4.sif") urls+=("494c8c9e1031828f48027e34032de423" "images/gado-1.0.3.sif") urls+=("d25ba2124ef883b1b6f7a2eff2cb8201" "images/glnexus_v1.4.5-patched.sif") urls+=("ff8aceb2c9f185307a69b981ba08efd8" "images/manta-1.6.0.sif") urls+=("1e0caddbdd755bf608ef024e3d0a2f19" "images/minimap2-2.26.sif") + urls+=("7422915ce79a9dc120cb82fa4f2c06dd" "images/modkit-sha3745cd8f97213eaf908f5fbf4f2f8b8e2cedfc30.sif") urls+=("06ac8a76a307fa42fffd80ab906fd24b" "images/picard-3.1.1.sif") urls+=("9a4b685b26744113d3ea0a3904c02706" "images/samtools-1.17-patch1.sif") urls+=("2c18fcda2660792a7c8ba390463ae7ac" "images/straglr-philres-1.4.2.sif") diff --git a/modules/pod5/dorado.nf b/modules/pod5/dorado.nf new file mode 100644 index 000000000..f314173fd --- /dev/null +++ b/modules/pod5/dorado.nf @@ -0,0 +1,19 @@ +process dorado { + // Basecall pod5 files using Dorado + label 'dorado' + publishDir "$params.output/intermediates", mode: 'link' + + input: + tuple val(meta), path(pod5) + + output: + tuple val(meta), path(bam) + + shell: + reference=params[params.assembly].reference.fasta + bam="${meta.project.id}_${meta.sample.family_id}_${meta.sample.individual_id}.bam" + + template "dorado.sh" + + +} \ No newline at end of file diff --git a/modules/pod5/modkit.nf b/modules/pod5/modkit.nf new file mode 100644 index 000000000..13f1c5b4c --- /dev/null +++ b/modules/pod5/modkit.nf @@ -0,0 +1,25 @@ +process modkit { + // Proccess bam files using Modkit tool + + label 'modkit' + publishDir "$params.output/intermediates", mode: 'link' + + input: + tuple val(meta), path(sorted_bam), path(sorted_bam_index) + + output: + tuple val(meta), path(bedmethyl) + + shell: + refSeqPath = params[params.assembly].reference.fasta + reference = refSeqPath.substring(0, refSeqPath.lastIndexOf('.')) + name = "${meta.project.id}_${meta.sample.family_id}_${meta.sample.individual_id}" + bedmethyl = "${name}.bedmethyl" + converted_bam = "${name}_converted.bam" + converted_bam_index = "${name}_converted.bam.csi" + summary_modkit = "${name}_summary_modkit.txt" + log_modkit = "${name}_modkit.log" + + template 'modkit.sh' + +} \ No newline at end of file diff --git a/modules/pod5/samtools.nf b/modules/pod5/samtools.nf new file mode 100644 index 000000000..4ff800b76 --- /dev/null +++ b/modules/pod5/samtools.nf @@ -0,0 +1,19 @@ +process sort_bam { + // Sort bam files using SAMTools + label "sort_bam" + publishDir "$params.output/intermediates", mode: 'link' + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path(sortedBam), path(sortedBamIndex), path(sortedBamStats) + + shell: + sortedBam="${meta.project.id}_${meta.sample.family_id}_${meta.sample.individual_id}_sorted.bam" + sortedBamIndex="${meta.project.id}_${meta.sample.family_id}_${meta.sample.individual_id}_sorted.bam.csi" + sortedBamStats="${meta.project.id}_${meta.sample.family_id}_${meta.sample.individual_id}_sorted.bam.stats" + + template 'samtools.sh' + +} \ No newline at end of file diff --git a/modules/pod5/templates/dorado.sh b/modules/pod5/templates/dorado.sh new file mode 100644 index 000000000..e32f6edc4 --- /dev/null +++ b/modules/pod5/templates/dorado.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -euo pipefail + +mod_basecaller() { + # Command for Dorado tool + echo "working" + ${CMD_DORADO} basecaller !{params.dorado_model} ./ --modified-bases 5mCG_5hmCG --reference !{reference} > !{bam} +} + +main() { + mod_basecaller +} + +main "$@" \ No newline at end of file diff --git a/modules/pod5/templates/modkit.sh b/modules/pod5/templates/modkit.sh new file mode 100644 index 000000000..33cf3f812 --- /dev/null +++ b/modules/pod5/templates/modkit.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -euo pipefail + +summary() { + # Use modkit tool to summarize bam files + ${CMD_MODKIT} summary !{sorted_bam} > !{summary_modkit} +} + +adjust_mod() { + ${CMD_MODKIT} adjust-mods !{sorted_bam} !{converted_bam} --convert h m + ${CMD_SAMTOOLS} index -c !{converted_bam} +} + +pileup() { + # Use modkit tool to process bam to bedmethyl file + ${CMD_MODKIT} pileup !{converted_bam} !{bedmethyl} --cpg --ref !{reference} --only-tabs --log-filepath !{log_modkit} +} + +main() { + summary + adjust_mod + pileup +} + +main "$@" \ No newline at end of file diff --git a/modules/pod5/templates/samtools.sh b/modules/pod5/templates/samtools.sh new file mode 100644 index 000000000..cd1e35f5e --- /dev/null +++ b/modules/pod5/templates/samtools.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -euo pipefail + +sort() { + # Use samtools to sort bam + ${CMD_SAMTOOLS} sort --no-PG -u -o !{sortedBam} !{bam} --write-index +} + +stats() { + ${CMD_SAMTOOLS} idxstats "!{sortedBam}" > "!{sortedBamStats}" +} + +main() { + sort + stats +} + +main "$@" \ No newline at end of file diff --git a/modules/vcf/report.nf b/modules/vcf/report.nf index 4910545a8..1cc7aef2a 100644 --- a/modules/vcf/report.nf +++ b/modules/vcf/report.nf @@ -7,7 +7,7 @@ process report { publishDir "$params.output", mode: 'link' input: - tuple val(meta), path(vcf), path(vcfIndex), path(crams) + tuple val(meta), path(vcf), path(vcfIndex), path(crams), path(bedmethyls) output: tuple val(meta), path(vcfOut), path(vcfOutIndex), path(reportPath) @@ -26,8 +26,11 @@ process report { maxSamples = params.vcf.report.max_samples genesPath = params.vcf.report[meta.project.assembly].genes template = params.vcf.report.template + vcfReportJar = params.vcf.report.vcf_report_jar crams = meta.crams ? meta.crams.collect { "${it.individual_id}=${it.cram}" }.join(",") : "" + bedmethyls = meta.bedmethyls ? meta.bedmethyls.findAll { it.bedmethyl }.collect { "${it.individual_id}=${it.bedmethyl}" }.join(",") : "" includeCrams = params.vcf.report.include_crams + includeBedMethyls = params.vcf.report.include_bedmethyls probands = meta.probands.collect{ proband -> proband.individual_id }.join(",") hpoIds = meta.project.samples.findAll{ sample -> !sample.hpo_ids.isEmpty() }.collect{ sample -> [sample.individual_id, sample.hpo_ids.join(";")].join("/") }.join(",") diff --git a/modules/vcf/templates/report.sh b/modules/vcf/templates/report.sh index 490644d86..3f9026a9d 100644 --- a/modules/vcf/templates/report.sh +++ b/modules/vcf/templates/report.sh @@ -28,7 +28,7 @@ report() { args+=("-Djava.io.tmpdir=\"${TMPDIR}\"") args+=("-XX:ParallelGCThreads=2") args+=("-Xmx!{task.memory.toMega() - 256}m") - args+=("-jar" "/opt/vcf-report/lib/vcf-report.jar") + args+=("-jar" "!{vcfReportJar}") args+=("--input" "!{vcfOut}") args+=("--reference" "!{refSeqPath}") args+=("--output" "!{reportPath}") @@ -59,6 +59,9 @@ report() { if [ -n "!{crams}" ] && [[ "!{includeCrams}" == "true" ]]; then args+=("--cram" "!{crams}") fi + if [ -n "!{bedmethyls}" ] && [[ "!{includeBedMethyls}" == "true" ]]; then + args+=("--bedmethyl" "!{bedmethyls}") + fi ${CMD_VCFREPORT} java "${args[@]}" } diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/0.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/0.conv.bias.tensor new file mode 100644 index 000000000..48e310e7e Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/0.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/0.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/0.conv.weight.tensor new file mode 100644 index 000000000..55086400f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/0.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/1.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/1.conv.bias.tensor new file mode 100644 index 000000000..6325ae59f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/1.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/1.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/1.conv.weight.tensor new file mode 100644 index 000000000..cc6244b14 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/1.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/2.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/2.conv.bias.tensor new file mode 100644 index 000000000..a6f5e5eca Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/2.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/2.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/2.conv.weight.tensor new file mode 100644 index 000000000..9f07b7bf4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/2.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..532f55a7f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..144eefb49 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..049c6f5f9 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..5cf8dc745 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/4.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..5b665df71 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..460609ba5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..5137e9bb5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..8752f5ad5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/5.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..86e52427c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..1a3426f89 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..a8a5cec57 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..61475cc68 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/6.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..0c6efb1e1 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..a5d841006 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..cff6bc400 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..08dbbdb31 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/7.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..840da77b6 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..1c1f15673 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..640fe3aef Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..ecb8de34d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/8.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/9.linear.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/9.linear.weight.tensor new file mode 100644 index 000000000..adc451991 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/9.linear.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/config.toml new file mode 100644 index 000000000..b7c90bb08 --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0/config.toml @@ -0,0 +1,120 @@ +[model] +package = "bonito.crf" + +[labels] +labels = [ "N", "A", "C", "G", "T",] + +[input] +features = 1 + +[global_norm] +state_len = 3 + +[run_info] +sample_rate = 5000 + +[encoder] +type = "serial" +[[encoder.sublayers]] +type = "convolution" +insize = 1 +size = 16 +bias = true +winlen = 5 +stride = 1 +padding = 2 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "convolution" +insize = 16 +size = 16 +bias = true +winlen = 5 +stride = 1 +padding = 2 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "convolution" +insize = 16 +size = 96 +bias = true +winlen = 19 +stride = 6 +padding = 9 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "permute" +dims = [ 2, 0, 1,] + +[[encoder.sublayers]] +type = "lstm" +size = 96 +insize = 96 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "lstm" +size = 96 +insize = 96 +bias = true +reverse = 0 + +[[encoder.sublayers]] +type = "lstm" +size = 96 +insize = 96 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "lstm" +size = 96 +insize = 96 +bias = true +reverse = 0 + +[[encoder.sublayers]] +type = "lstm" +size = 96 +insize = 96 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "linearcrfencoder" +insize = 96 +n_base = 4 +state_len = 3 +bias = false +blank_score = 2.0 + +[[encoder.sublayers]] +type = "clamp" +min = -5.0 +max = 5.0 + +[qscore] +scale = 0.97 +bias = -1.8 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/config.toml new file mode 100644 index 000000000..69274b742 --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/config.toml @@ -0,0 +1,24 @@ +[general] +creation_date = "05/09/2023, 17:04:15" +model = "conv_lstm" + +[model_params] +size = 128 +kmer_len = 9 +num_out = 3 + +[modbases] +mod_bases = "hm" +offset = 0 +mod_long_names_0 = "5hmC" +mod_long_names_1 = "5mC" +chunk_context_0 = 50 +chunk_context_1 = 50 +kmer_context_bases_0 = 4 +kmer_context_bases_1 = 4 +motif = "CG" +motif_offset = 0 + +[refinement] +refine_do_rough_rescale = 1 +refine_kmer_center_idx = 6 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor new file mode 100644 index 000000000..bce611fe8 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor new file mode 100644 index 000000000..ec1e41c62 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor new file mode 100644 index 000000000..b93a4764f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor new file mode 100644 index 000000000..acceecf78 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor new file mode 100644 index 000000000..e26719076 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor new file mode 100644 index 000000000..761941352 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor new file mode 100644 index 000000000..76954a436 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor new file mode 100644 index 000000000..cc0fa8a21 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor new file mode 100644 index 000000000..1127f6448 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor new file mode 100644 index 000000000..2d7ce8f82 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor new file mode 100644 index 000000000..877f8524e Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor new file mode 100644 index 000000000..f80fc7df4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor new file mode 100644 index 000000000..2905edba3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor new file mode 100644 index 000000000..6a0c6479f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor new file mode 100644 index 000000000..05838a224 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor new file mode 100644 index 000000000..19ba30986 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor new file mode 100644 index 000000000..0f7be0d78 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor new file mode 100644 index 000000000..0a02887d1 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor new file mode 100644 index 000000000..822f36cc5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor new file mode 100644 index 000000000..1b71bd638 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor new file mode 100644 index 000000000..b65f1970f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor new file mode 100644 index 000000000..5191bae76 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor new file mode 100644 index 000000000..d068040ec Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/0.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/0.conv.bias.tensor new file mode 100644 index 000000000..04efc8962 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/0.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/0.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/0.conv.weight.tensor new file mode 100644 index 000000000..4ec68b7e0 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/0.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/1.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/1.conv.bias.tensor new file mode 100644 index 000000000..f7f9ff38c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/1.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/1.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/1.conv.weight.tensor new file mode 100644 index 000000000..4064c8013 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/1.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/10.linear.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/10.linear.weight.tensor new file mode 100644 index 000000000..64aa478d4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/10.linear.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/2.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/2.conv.bias.tensor new file mode 100644 index 000000000..889d28011 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/2.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/2.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/2.conv.weight.tensor new file mode 100644 index 000000000..9541491b5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/2.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..1e8bbac48 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..cfcd9e493 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..1bd53bee4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..06ff6b0ed Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/4.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..ed853516f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..29bb42644 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..69c5ddcfd Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..16e284cc5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/5.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..a4a777a82 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..3cd00aaa0 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..de82588a7 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..0741eb420 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/6.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..3a05f8d2d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..f23a747c6 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..123ea1033 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..55258ebf7 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/7.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..9a9dc43c1 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..c58461909 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..8bb71d02c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..c003d9960 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/8.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/9.linear.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/9.linear.bias.tensor new file mode 100644 index 000000000..c80a93887 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/9.linear.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/9.linear.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/9.linear.weight.tensor new file mode 100644 index 000000000..e724c9a2b Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/9.linear.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/config.toml new file mode 100644 index 000000000..e9e6635dc --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0/config.toml @@ -0,0 +1,125 @@ +[model] +package = "bonito.crf" + +[labels] +labels = [ "N", "A", "C", "G", "T",] + +[input] +features = 1 + +[global_norm] +state_len = 4 + +[run_info] +sample_rate = 5000 + +[encoder] +type = "serial" +[[encoder.sublayers]] +type = "convolution" +insize = 1 +size = 16 +bias = true +winlen = 5 +stride = 1 +padding = 2 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "convolution" +insize = 16 +size = 16 +bias = true +winlen = 5 +stride = 1 +padding = 2 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "convolution" +insize = 16 +size = 384 +bias = true +winlen = 19 +stride = 6 +padding = 9 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "permute" +dims = [ 2, 0, 1,] + +[[encoder.sublayers]] +type = "lstm" +size = 384 +insize = 384 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "lstm" +size = 384 +insize = 384 +bias = true +reverse = 0 + +[[encoder.sublayers]] +type = "lstm" +size = 384 +insize = 384 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "lstm" +size = 384 +insize = 384 +bias = true +reverse = 0 + +[[encoder.sublayers]] +type = "lstm" +size = 384 +insize = 384 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "linear" +in_features = 384 +out_features = 128 + +[[encoder.sublayers]] +type = "linearcrfencoder" +insize = 128 +n_base = 4 +state_len = 4 +bias = false +blank_score = 2.0 + +[[encoder.sublayers]] +type = "clamp" +min = -5.0 +max = 5.0 + +[qscore] +scale = 0.95 +bias = -0.2 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/config.toml new file mode 100644 index 000000000..2bce69b96 --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/config.toml @@ -0,0 +1,24 @@ +[general] +creation_date = "05/09/2023, 17:04:18" +model = "conv_lstm" + +[model_params] +size = 256 +kmer_len = 9 +num_out = 3 + +[modbases] +mod_bases = "hm" +offset = 0 +mod_long_names_0 = "5hmC" +mod_long_names_1 = "5mC" +chunk_context_0 = 50 +chunk_context_1 = 50 +kmer_context_bases_0 = 4 +kmer_context_bases_1 = 4 +motif = "CG" +motif_offset = 0 + +[refinement] +refine_do_rough_rescale = 1 +refine_kmer_center_idx = 6 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor new file mode 100644 index 000000000..03f12812d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor new file mode 100644 index 000000000..eab733cbc Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor new file mode 100644 index 000000000..39a841e8b Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor new file mode 100644 index 000000000..f9848d4d7 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor new file mode 100644 index 000000000..044dc9e19 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor new file mode 100644 index 000000000..692e2ef7b Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor new file mode 100644 index 000000000..ff07a7d6e Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor new file mode 100644 index 000000000..7e11cbd04 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor new file mode 100644 index 000000000..b964a01f3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor new file mode 100644 index 000000000..7c7d70ec9 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor new file mode 100644 index 000000000..728ebd45c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor new file mode 100644 index 000000000..023bc13b4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor new file mode 100644 index 000000000..2905edba3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor new file mode 100644 index 000000000..97f0328dc Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor new file mode 100644 index 000000000..b7e6ba4fe Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor new file mode 100644 index 000000000..b924e3d5d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor new file mode 100644 index 000000000..16baf8c64 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor new file mode 100644 index 000000000..26cc4c347 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor new file mode 100644 index 000000000..99c4094d9 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor new file mode 100644 index 000000000..ad2c5848b Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor new file mode 100644 index 000000000..4efc583f6 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor new file mode 100644 index 000000000..d8ccbd254 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor new file mode 100644 index 000000000..b53bff8ea Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/0.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/0.conv.bias.tensor new file mode 100644 index 000000000..f2ee152c4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/0.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/0.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/0.conv.weight.tensor new file mode 100644 index 000000000..ba3c47450 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/0.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/1.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/1.conv.bias.tensor new file mode 100644 index 000000000..c5515b389 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/1.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/1.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/1.conv.weight.tensor new file mode 100644 index 000000000..261b8d771 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/1.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/10.linear.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/10.linear.weight.tensor new file mode 100644 index 000000000..b72efde2a Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/10.linear.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/2.conv.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/2.conv.bias.tensor new file mode 100644 index 000000000..772acdf7e Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/2.conv.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/2.conv.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/2.conv.weight.tensor new file mode 100644 index 000000000..ddd6b292d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/2.conv.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..7a3560d0d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..41ab8a1f4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..66bf2647c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..1c384dac5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/4.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..b92ae9892 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..9445f2774 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..710ab8a52 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..50574c411 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/5.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..12cc9c055 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..332799c6b Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..4bdf130aa Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..b904361b7 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/6.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..b464eb871 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..f659c20ad Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..eadc62e93 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..c8ec89469 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/7.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.bias_hh_l0.tensor new file mode 100644 index 000000000..4f3da976d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.bias_ih_l0.tensor new file mode 100644 index 000000000..2452cf3d0 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.weight_hh_l0.tensor new file mode 100644 index 000000000..f28ea85b8 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.weight_ih_l0.tensor new file mode 100644 index 000000000..1d8bdac3d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/8.rnn.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/9.linear.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/9.linear.bias.tensor new file mode 100644 index 000000000..bd413453d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/9.linear.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/9.linear.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/9.linear.weight.tensor new file mode 100644 index 000000000..78ce0894d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/9.linear.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/config.toml new file mode 100644 index 000000000..5b334147c --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0/config.toml @@ -0,0 +1,125 @@ +[model] +package = "bonito.crf" + +[labels] +labels = [ "N", "A", "C", "G", "T",] + +[input] +features = 1 + +[global_norm] +state_len = 5 + +[run_info] +sample_rate = 5000 + +[encoder] +type = "serial" +[[encoder.sublayers]] +type = "convolution" +insize = 1 +size = 16 +bias = true +winlen = 5 +stride = 1 +padding = 2 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "convolution" +insize = 16 +size = 16 +bias = true +winlen = 5 +stride = 1 +padding = 2 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "convolution" +insize = 16 +size = 1024 +bias = true +winlen = 19 +stride = 6 +padding = 9 +activation = "swish" +norm = "batchnorm" + +[[encoder.sublayers]] +type = "clamp" +min = -0.5 +max = 3.5 + +[[encoder.sublayers]] +type = "permute" +dims = [ 2, 0, 1,] + +[[encoder.sublayers]] +type = "lstm" +size = 1024 +insize = 1024 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "lstm" +size = 1024 +insize = 1024 +bias = true +reverse = 0 + +[[encoder.sublayers]] +type = "lstm" +size = 1024 +insize = 1024 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "lstm" +size = 1024 +insize = 1024 +bias = true +reverse = 0 + +[[encoder.sublayers]] +type = "lstm" +size = 1024 +insize = 1024 +bias = true +reverse = 1 + +[[encoder.sublayers]] +type = "linear" +in_features = 1024 +out_features = 256 + +[[encoder.sublayers]] +type = "linearcrfencoder" +insize = 256 +n_base = 4 +state_len = 5 +bias = false +blank_score = 2.0 + +[[encoder.sublayers]] +type = "clamp" +min = -5.0 +max = 5.0 + +[qscore] +scale = 0.95 +bias = 0.5 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/config.toml new file mode 100644 index 000000000..5570082f9 --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/config.toml @@ -0,0 +1,24 @@ +[general] +creation_date = "05/09/2023, 00:10:01" +model = "conv_lstm" + +[model_params] +size = 128 +kmer_len = 9 +num_out = 2 + +[modbases] +mod_bases = "m" +offset = 0 +reverse_signal = false +mod_long_names_0 = "5mC" +chunk_context_0 = 50 +chunk_context_1 = 50 +kmer_context_bases_0 = 4 +kmer_context_bases_1 = 4 +motif = "C" +motif_offset = 0 + +[refinement] +refine_do_rough_rescale = 1 +refine_kmer_center_idx = 6 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/fc.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/fc.bias.tensor new file mode 100644 index 000000000..708da592f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/fc.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/fc.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/fc.weight.tensor new file mode 100644 index 000000000..e5334cc5f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/fc.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.bias_hh_l0.tensor new file mode 100644 index 000000000..3c1865dab Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.bias_ih_l0.tensor new file mode 100644 index 000000000..38219b3fc Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.weight_hh_l0.tensor new file mode 100644 index 000000000..1fd1c82ba Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.weight_ih_l0.tensor new file mode 100644 index 000000000..0624071ca Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm1.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.bias_hh_l0.tensor new file mode 100644 index 000000000..587050596 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.bias_ih_l0.tensor new file mode 100644 index 000000000..4eb90dd29 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.weight_hh_l0.tensor new file mode 100644 index 000000000..aa9b7884e Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.weight_ih_l0.tensor new file mode 100644 index 000000000..9c0b6aab9 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/lstm2.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/merge_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/merge_conv1.bias.tensor new file mode 100644 index 000000000..c38d94e50 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/merge_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/merge_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/merge_conv1.weight.tensor new file mode 100644 index 000000000..eacbdf24c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/merge_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/refine_kmer_levels.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/refine_kmer_levels.tensor new file mode 100644 index 000000000..2905edba3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/refine_kmer_levels.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv1.bias.tensor new file mode 100644 index 000000000..14e196504 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv1.weight.tensor new file mode 100644 index 000000000..520cff2bb Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv2.bias.tensor new file mode 100644 index 000000000..e680420d6 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv2.weight.tensor new file mode 100644 index 000000000..a5e4febe1 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/seq_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv1.bias.tensor new file mode 100644 index 000000000..cff8519ed Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv1.weight.tensor new file mode 100644 index 000000000..025cb245e Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv2.bias.tensor new file mode 100644 index 000000000..0c3c108b9 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv2.weight.tensor new file mode 100644 index 000000000..f66ce5488 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv3.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv3.bias.tensor new file mode 100644 index 000000000..9337f19cd Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv3.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv3.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv3.weight.tensor new file mode 100644 index 000000000..45253d641 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2/sig_conv3.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/config.toml new file mode 100644 index 000000000..e5a4a45e5 --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/config.toml @@ -0,0 +1,24 @@ +[general] +creation_date = "05/09/2023, 17:04:21" +model = "conv_lstm" + +[model_params] +size = 256 +kmer_len = 9 +num_out = 3 + +[modbases] +mod_bases = "hm" +offset = 0 +mod_long_names_0 = "5hmC" +mod_long_names_1 = "5mC" +chunk_context_0 = 50 +chunk_context_1 = 50 +kmer_context_bases_0 = 4 +kmer_context_bases_1 = 4 +motif = "CG" +motif_offset = 0 + +[refinement] +refine_do_rough_rescale = 1 +refine_kmer_center_idx = 6 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor new file mode 100644 index 000000000..206846d1f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/fc.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor new file mode 100644 index 000000000..d5e3e26ea Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/fc.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor new file mode 100644 index 000000000..21074fe46 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor new file mode 100644 index 000000000..1c9b39ce0 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor new file mode 100644 index 000000000..8aa98d9c7 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor new file mode 100644 index 000000000..44922a82f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm1.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor new file mode 100644 index 000000000..bc76d2416 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor new file mode 100644 index 000000000..83eb0f0a0 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor new file mode 100644 index 000000000..f0ba752ed Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor new file mode 100644 index 000000000..909a02db7 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/lstm2.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor new file mode 100644 index 000000000..1261aa55d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/merge_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor new file mode 100644 index 000000000..9eb99cc62 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/merge_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor new file mode 100644 index 000000000..2905edba3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/refine_kmer_levels.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor new file mode 100644 index 000000000..5c76fbc02 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor new file mode 100644 index 000000000..7068dace5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor new file mode 100644 index 000000000..2ea1925b2 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor new file mode 100644 index 000000000..2b8c83adf Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/seq_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor new file mode 100644 index 000000000..c39258551 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor new file mode 100644 index 000000000..107f5ffa5 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor new file mode 100644 index 000000000..cee65a130 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor new file mode 100644 index 000000000..e84d31a0c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor new file mode 100644 index 000000000..c5691413f Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv3.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor new file mode 100644 index 000000000..91311dc33 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2/sig_conv3.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/config.toml b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/config.toml new file mode 100644 index 000000000..d612027b9 --- /dev/null +++ b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/config.toml @@ -0,0 +1,23 @@ +[general] +creation_date = "04/21/2023, 13:53:36" +model = "conv_lstm" + +[model_params] +size = 256 +kmer_len = 9 +num_out = 2 + +[modbases] +mod_bases = "a" +offset = 0 +mod_long_names_0 = "6mA" +chunk_context_0 = 100 +chunk_context_1 = 100 +kmer_context_bases_0 = 4 +kmer_context_bases_1 = 4 +motif = "A" +motif_offset = 0 + +[refinement] +refine_do_rough_rescale = 1 +refine_kmer_center_idx = 6 diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/fc.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/fc.bias.tensor new file mode 100644 index 000000000..f95def066 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/fc.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/fc.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/fc.weight.tensor new file mode 100644 index 000000000..3746d253a Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/fc.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.bias_hh_l0.tensor new file mode 100644 index 000000000..93c3fb150 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.bias_ih_l0.tensor new file mode 100644 index 000000000..6c791caeb Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.weight_hh_l0.tensor new file mode 100644 index 000000000..7f5b32dd4 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.weight_ih_l0.tensor new file mode 100644 index 000000000..f0234a3fc Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm1.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.bias_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.bias_hh_l0.tensor new file mode 100644 index 000000000..2adce0d09 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.bias_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.bias_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.bias_ih_l0.tensor new file mode 100644 index 000000000..1f282e1bf Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.bias_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.weight_hh_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.weight_hh_l0.tensor new file mode 100644 index 000000000..567efbda3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.weight_hh_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.weight_ih_l0.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.weight_ih_l0.tensor new file mode 100644 index 000000000..c2cc88850 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/lstm2.weight_ih_l0.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/merge_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/merge_conv1.bias.tensor new file mode 100644 index 000000000..d50d1874c Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/merge_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/merge_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/merge_conv1.weight.tensor new file mode 100644 index 000000000..25e84da70 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/merge_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/refine_kmer_levels.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/refine_kmer_levels.tensor new file mode 100644 index 000000000..2905edba3 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/refine_kmer_levels.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv1.bias.tensor new file mode 100644 index 000000000..639f41efe Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv1.weight.tensor new file mode 100644 index 000000000..4ae42eaad Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv2.bias.tensor new file mode 100644 index 000000000..090c5ef78 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv2.weight.tensor new file mode 100644 index 000000000..fa4ff0301 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/seq_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv1.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv1.bias.tensor new file mode 100644 index 000000000..ce8ea7a41 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv1.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv1.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv1.weight.tensor new file mode 100644 index 000000000..199ebae8d Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv1.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv2.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv2.bias.tensor new file mode 100644 index 000000000..169d9e6ee Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv2.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv2.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv2.weight.tensor new file mode 100644 index 000000000..fe7dbaedc Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv2.weight.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv3.bias.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv3.bias.tensor new file mode 100644 index 000000000..c702359bf Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv3.bias.tensor differ diff --git a/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv3.weight.tensor b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv3.weight.tensor new file mode 100644 index 000000000..7904c9386 Binary files /dev/null and b/resources/pod5/dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2/sig_conv3.weight.tensor differ diff --git a/resources/pod5/pod5-vcf-report.jar b/resources/pod5/pod5-vcf-report.jar new file mode 100644 index 000000000..3bb4bb272 Binary files /dev/null and b/resources/pod5/pod5-vcf-report.jar differ diff --git a/resources/pod5/pod5_template.html b/resources/pod5/pod5_template.html new file mode 100644 index 000000000..1c4a5f352 --- /dev/null +++ b/resources/pod5/pod5_template.html @@ -0,0 +1,14 @@ + + + + + + + VCF Report + + + + + + + diff --git a/test/suites/pod5/hg001_giab_2023.05.sh b/test/suites/pod5/hg001_giab_2023.05.sh new file mode 100644 index 000000000..29160fb38 --- /dev/null +++ b/test/suites/pod5/hg001_giab_2023.05.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -euo pipefail + +# shellcheck disable=SC1091 +source "${TEST_UTILS_DIR}/utils.sh" + + if ! aws s3 cp help &> /dev/null + then + echo "command 'cp' could not be found (possible solution: run 'ml awscli' before executing this script)" + exit 1 + fi + +aws s3 cp --region eu-west-1 --no-sign-request s3://ont-open-data/giab_2023.05/flowcells/hg001/20230505_1857_1B_PAO99309_94e07fab/pod5_pass/ ${TEST_RESOURCES_DIR}/downloads/ --recursive --exclude PAO99309_pass__94e07fab_c3641428_* --include PAO99309_pass__94e07fab_c3641428_9* + +args=() +args+=("--workflow" "pod5") +args+=("--input" "${TEST_RESOURCES_DIR}/hg001_giab_2023_05.tsv") +args+=("--config" "${TEST_RESOURCES_DIR}/hg001_giab_2023_05.cfg") +args+=("--output" "${OUTPUT_DIR}") +args+=("--resume") + +vip "${args[@]}" 1> /dev/null + +compare expected to actual output and store result +if [ "$(zcat "${OUTPUT_DIR}/vip.vcf.gz" | grep -vc "^#")" -gt 0 ]; then + result="0" +else + result="1" +fi +echo -n "${result}" > "${OUTPUT_DIR}/.exitcode" + +# always exit with success error code +exit 0 \ No newline at end of file diff --git a/test/suites/pod5/resources/hg001_giab_2023_05.cfg b/test/suites/pod5/resources/hg001_giab_2023_05.cfg new file mode 100644 index 000000000..aa6367807 --- /dev/null +++ b/test/suites/pod5/resources/hg001_giab_2023_05.cfg @@ -0,0 +1,4 @@ +params { + cram.call_str=false + cram.call_sv=false +} \ No newline at end of file diff --git a/test/suites/pod5/resources/hg001_giab_2023_05.tsv b/test/suites/pod5/resources/hg001_giab_2023_05.tsv new file mode 100644 index 000000000..967105d29 --- /dev/null +++ b/test/suites/pod5/resources/hg001_giab_2023_05.tsv @@ -0,0 +1,2 @@ +individual_id affected sequencing_platform pod5 +hg001_giab_2023_05 true nanopore downloads/PAO99309_pass__94e07fab_c3641428_957.pod5,downloads/PAO99309_pass__94e07fab_c3641428_99.pod5,downloads/PAO99309_pass__94e07fab_c3641428_933.pod5,downloads/PAO99309_pass__94e07fab_c3641428_969.pod5,downloads/PAO99309_pass__94e07fab_c3641428_928.pod5,downloads/PAO99309_pass__94e07fab_c3641428_964.pod5,downloads/PAO99309_pass__94e07fab_c3641428_907.pod5,downloads/PAO99309_pass__94e07fab_c3641428_968.pod5,downloads/PAO99309_pass__94e07fab_c3641428_903.pod5,downloads/PAO99309_pass__94e07fab_c3641428_978.pod5,downloads/PAO99309_pass__94e07fab_c3641428_997.pod5,downloads/PAO99309_pass__94e07fab_c3641428_979.pod5,downloads/PAO99309_pass__94e07fab_c3641428_951.pod5,downloads/PAO99309_pass__94e07fab_c3641428_940.pod5,downloads/PAO99309_pass__94e07fab_c3641428_985.pod5,downloads/PAO99309_pass__94e07fab_c3641428_938.pod5,downloads/PAO99309_pass__94e07fab_c3641428_902.pod5,downloads/PAO99309_pass__94e07fab_c3641428_942.pod5,downloads/PAO99309_pass__94e07fab_c3641428_904.pod5,downloads/PAO99309_pass__94e07fab_c3641428_972.pod5,downloads/PAO99309_pass__94e07fab_c3641428_982.pod5,downloads/PAO99309_pass__94e07fab_c3641428_959.pod5,downloads/PAO99309_pass__94e07fab_c3641428_950.pod5,downloads/PAO99309_pass__94e07fab_c3641428_943.pod5,downloads/PAO99309_pass__94e07fab_c3641428_991.pod5,downloads/PAO99309_pass__94e07fab_c3641428_994.pod5,downloads/PAO99309_pass__94e07fab_c3641428_91.pod5,downloads/PAO99309_pass__94e07fab_c3641428_987.pod5,downloads/PAO99309_pass__94e07fab_c3641428_944.pod5,downloads/PAO99309_pass__94e07fab_c3641428_912.pod5,downloads/PAO99309_pass__94e07fab_c3641428_995.pod5,downloads/PAO99309_pass__94e07fab_c3641428_974.pod5,downloads/PAO99309_pass__94e07fab_c3641428_923.pod5,downloads/PAO99309_pass__94e07fab_c3641428_949.pod5,downloads/PAO99309_pass__94e07fab_c3641428_934.pod5,downloads/PAO99309_pass__94e07fab_c3641428_958.pod5,downloads/PAO99309_pass__94e07fab_c3641428_989.pod5,downloads/PAO99309_pass__94e07fab_c3641428_935.pod5,downloads/PAO99309_pass__94e07fab_c3641428_986.pod5,downloads/PAO99309_pass__94e07fab_c3641428_919.pod5,downloads/PAO99309_pass__94e07fab_c3641428_922.pod5,downloads/PAO99309_pass__94e07fab_c3641428_998.pod5,downloads/PAO99309_pass__94e07fab_c3641428_954.pod5,downloads/PAO99309_pass__94e07fab_c3641428_981.pod5,downloads/PAO99309_pass__94e07fab_c3641428_946.pod5,downloads/PAO99309_pass__94e07fab_c3641428_976.pod5,downloads/PAO99309_pass__94e07fab_c3641428_921.pod5,downloads/PAO99309_pass__94e07fab_c3641428_900.pod5,downloads/PAO99309_pass__94e07fab_c3641428_965.pod5,downloads/PAO99309_pass__94e07fab_c3641428_9.pod5,downloads/PAO99309_pass__94e07fab_c3641428_96.pod5,downloads/PAO99309_pass__94e07fab_c3641428_92.pod5,downloads/PAO99309_pass__94e07fab_c3641428_94.pod5,downloads/PAO99309_pass__94e07fab_c3641428_936.pod5,downloads/PAO99309_pass__94e07fab_c3641428_911.pod5,downloads/PAO99309_pass__94e07fab_c3641428_983.pod5,downloads/PAO99309_pass__94e07fab_c3641428_95.pod5,downloads/PAO99309_pass__94e07fab_c3641428_910.pod5,downloads/PAO99309_pass__94e07fab_c3641428_956.pod5,downloads/PAO99309_pass__94e07fab_c3641428_973.pod5,downloads/PAO99309_pass__94e07fab_c3641428_947.pod5,downloads/PAO99309_pass__94e07fab_c3641428_992.pod5,downloads/PAO99309_pass__94e07fab_c3641428_924.pod5,downloads/PAO99309_pass__94e07fab_c3641428_914.pod5,downloads/PAO99309_pass__94e07fab_c3641428_967.pod5,downloads/PAO99309_pass__94e07fab_c3641428_918.pod5,downloads/PAO99309_pass__94e07fab_c3641428_916.pod5,downloads/PAO99309_pass__94e07fab_c3641428_980.pod5,downloads/PAO99309_pass__94e07fab_c3641428_909.pod5,downloads/PAO99309_pass__94e07fab_c3641428_960.pod5,downloads/PAO99309_pass__94e07fab_c3641428_908.pod5,downloads/PAO99309_pass__94e07fab_c3641428_917.pod5,downloads/PAO99309_pass__94e07fab_c3641428_966.pod5,downloads/PAO99309_pass__94e07fab_c3641428_961.pod5,downloads/PAO99309_pass__94e07fab_c3641428_945.pod5,downloads/PAO99309_pass__94e07fab_c3641428_915.pod5,downloads/PAO99309_pass__94e07fab_c3641428_97.pod5,downloads/PAO99309_pass__94e07fab_c3641428_988.pod5,downloads/PAO99309_pass__94e07fab_c3641428_913.pod5,downloads/PAO99309_pass__94e07fab_c3641428_939.pod5,downloads/PAO99309_pass__94e07fab_c3641428_937.pod5,downloads/PAO99309_pass__94e07fab_c3641428_927.pod5,downloads/PAO99309_pass__94e07fab_c3641428_98.pod5,downloads/PAO99309_pass__94e07fab_c3641428_925.pod5,downloads/PAO99309_pass__94e07fab_c3641428_962.pod5,downloads/PAO99309_pass__94e07fab_c3641428_990.pod5,downloads/PAO99309_pass__94e07fab_c3641428_955.pod5,downloads/PAO99309_pass__94e07fab_c3641428_931.pod5,downloads/PAO99309_pass__94e07fab_c3641428_941.pod5,downloads/PAO99309_pass__94e07fab_c3641428_971.pod5,downloads/PAO99309_pass__94e07fab_c3641428_920.pod5,downloads/PAO99309_pass__94e07fab_c3641428_977.pod5,downloads/PAO99309_pass__94e07fab_c3641428_905.pod5,downloads/PAO99309_pass__94e07fab_c3641428_993.pod5,downloads/PAO99309_pass__94e07fab_c3641428_996.pod5,downloads/PAO99309_pass__94e07fab_c3641428_93.pod5,downloads/PAO99309_pass__94e07fab_c3641428_901.pod5,downloads/PAO99309_pass__94e07fab_c3641428_970.pod5,downloads/PAO99309_pass__94e07fab_c3641428_930.pod5,downloads/PAO99309_pass__94e07fab_c3641428_906.pod5,downloads/PAO99309_pass__94e07fab_c3641428_953.pod5,downloads/PAO99309_pass__94e07fab_c3641428_963.pod5,downloads/PAO99309_pass__94e07fab_c3641428_975.pod5,downloads/PAO99309_pass__94e07fab_c3641428_999.pod5,downloads/PAO99309_pass__94e07fab_c3641428_932.pod5,downloads/PAO99309_pass__94e07fab_c3641428_948.pod5,downloads/PAO99309_pass__94e07fab_c3641428_926.pod5,downloads/PAO99309_pass__94e07fab_c3641428_984.pod5,downloads/PAO99309_pass__94e07fab_c3641428_952.pod5,downloads/PAO99309_pass__94e07fab_c3641428_90.pod5,downloads/PAO99309_pass__94e07fab_c3641428_929.pod5 \ No newline at end of file diff --git a/test/test.sh b/test/test.sh index 895fce8db..bbb2271a8 100644 --- a/test/test.sh +++ b/test/test.sh @@ -119,7 +119,7 @@ run() { sbatch_args+=("--job-name=vip_test") sbatch_args+=("--time=${time}") sbatch_args+=("--cpus-per-task=1") - sbatch_args+=("--mem=1gb") + sbatch_args+=("--mem=10gb") sbatch_args+=("--nodes=1") sbatch_args+=("--open-mode=append") sbatch_args+=("--export=PATH=${vip_dir}:${PATH},VIP_DIR=${vip_dir},TMPDIR=${test_output_dir}/tmp,NXF_HOME=${nextflow_home_dir},NXF_TEMP=${test_nextflow_temp_dir},NXF_WORK=${test_nextflow_work_dir},OUTPUT_DIR=${test_output_dir},TEST_RESOURCES_DIR=${test_resources_dir},TEST_UTILS_DIR=${SCRIPT_DIR}") @@ -281,7 +281,7 @@ main() { exit 2 fi - local test="cram,fastq,gvcf,vcf" + local test="cram,fastq,gvcf,vcf,pod5" local clean="false" eval set -- "${args}" diff --git a/utils/apptainer/build.sh b/utils/apptainer/build.sh index 4fcce935e..c4ab0fdae 100644 --- a/utils/apptainer/build.sh +++ b/utils/apptainer/build.sh @@ -108,6 +108,10 @@ main() { uris["docker://ensemblorg/ensembl-vep:release_109.3"]="vep-109.3" uris["docker://google/deepvariant:1.6.0"]="deepvariant-1.6.0" uris["docker://google/deepvariant:deeptrio-1.6.0"]="deepvariant_deeptrio-1.6.0" + # Modkit --version 0.1.13 + uris["docker://ontresearch/modkit:sha3745cd8f97213eaf908f5fbf4f2f8b8e2cedfc30"]="modkit-sha3745cd8f97213eaf908f5fbf4f2f8b8e2cedfc30" + # Dorado --version 5.2.0 + uris["docker://ontresearch/dorado:shac28cd94f2303b0493a4b16ca86e711852c2b8525"]="dorado-shac28cd94f2303b0493a4b16ca86e711852c2b8525" for i in "${!uris[@]}"; do echo "---Building from URI ${i}---" diff --git a/vip.sh b/vip.sh index b52bcaea6..a59509d89 100755 --- a/vip.sh +++ b/vip.sh @@ -9,7 +9,7 @@ VIP_DIR="${VIP_DIR:-"${SCRIPT_DIR}"}" usage() { echo -e "usage: ${SCRIPT_NAME} [-w -i -o ] - -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf + -w, --workflow workflow to execute. allowed values: cram, fastq, gvcf, vcf, pod5 -i, --input path to sample sheet .tsv -o, --output output folder -c, --config path to additional nextflow .cfg (optional) @@ -33,8 +33,8 @@ validate() { usage exit 2 fi - if [[ ! "${workflow}" =~ cram|fastq|gvcf|vcf ]]; then - >&2 echo -e "error: workflow '${workflow}'. allowed values are [cram, fastq, gvcf, vcf]" + if [[ ! "${workflow}" =~ cram|fastq|gvcf|vcf|pod5 ]]; then + >&2 echo -e "error: workflow '${workflow}'. allowed values are [cram, fastq, gvcf, vcf, pod5]" usage exit 2 fi @@ -44,7 +44,7 @@ validate() { usage exit 2 fi - if [[ ! -f "${input}" ]]; then + if [[ ! -f "${input}" ]] ; then >&2 echo -e "error: input '${input}' does not exist" exit 2 fi diff --git a/vip_pod5.nf b/vip_pod5.nf new file mode 100644 index 000000000..d0de9c771 --- /dev/null +++ b/vip_pod5.nf @@ -0,0 +1,94 @@ +nextflow.enable.dsl=2 + +// Modules to include +include { parseCommonSampleSheet; getAssemblies } from './modules/sample_sheet' +include { validateGroup } from './modules/utils' +include { dorado } from './modules/pod5/dorado' +include { sort_bam } from './modules/pod5/samtools' +include { modkit } from './modules/pod5/modkit' +include { cram; validateCramParams } from './vip_cram' + +workflow pod5 { + // Base modification workflow + take: meta + main: + meta + | branch { meta -> + pod5_data: !meta.sample.pod5.isEmpty() + ready: true + } + | set { ch_input } + + ch_input.pod5_data + | map { meta -> [*:meta, sample:[*:meta.sample, pod5:meta.sample.pod5] ] } + | set {ch_input_ready} + + // Basecalling using Dorado + ch_input_ready + | map { meta -> [ meta, meta.sample.pod5]} + | dorado + | map { meta, bam -> [*:meta, sample: [*:meta.sample, bam: bam]] } + | set {ch_basecalled} + + // Sorting output bam files from Dorado + ch_basecalled + | map { meta -> [ meta, meta.sample.bam ] } + | sort_bam + | map { meta, sortedBam, sortedBamIndex, sortedBamStats -> [*:meta, sample: [*:meta.sample, cram: sortedBam, cramIndex: sortedBamIndex, cramStats: sortedBamStats]] } + | set {ch_basecalled_sorted} + + // Processing bam files by modkit + ch_basecalled_sorted + | map { meta -> [ meta, meta.sample.cram, meta.sample.cramIndex ]} + | modkit + | map { meta, bedmethyl -> [ *:meta, sample: [*:meta.sample, bedmethyl: bedmethyl]]} + | set { ch_bedmethyl } + + ch_bedmethyl + | map { meta -> [*:meta, project: [*:meta.project, assembly: params.assembly], sample: [*:meta.sample, cram: [data: meta.sample.cram, index: meta.sample.cramIndex, stats: meta.sample.cramStats]]] } + | cram + +} + +workflow { + // Main workflow + def projects = parseSampleSheet(params.input) + def assemblies = getAssemblies(projects) + validatePod5Params(assemblies) + + Channel.from(projects) + | flatMap { project -> project.samples.collect { sample -> [project: project, sample: sample] } } + | pod5 +} + +def validatePod5Params(assemblies) { + def doradoModel = params.dorado_model + if(!file(doradoModel).isDirectory()) exit 1, "parameter 'params.dorado_model' value '${doradoModel}' is not an directory" +} + +def parseSampleSheet(csvFile){ + // Parse sample sheet: check for pod5 files + + def pod5Regex = /.+\.(pod5)(\.gz)?/ + + def cols = [ + pod5: [ + type: "file", + list: true, + required: true, + regex: pod5Regex + ], + region: [ + type: "string" + ], + sequencing_platform: [ + type: "string", + default: { 'nanopore' }, + enum: ['illumina', 'nanopore', 'pacbio_hifi'], + scope: "project" + ] + ] + + return parseCommonSampleSheet(csvFile, cols) +} + diff --git a/vip_vcf.nf b/vip_vcf.nf index b85410218..61c942278 100644 --- a/vip_vcf.nf +++ b/vip_vcf.nf @@ -200,7 +200,7 @@ workflow vcf { ready: true } | set { ch_concated } - + ch_outputs.ready | map { meta, vcfs, vcfIndexes -> [*:meta, vcf: vcfs.first(), vcf_index: vcfIndexes.first()] } | set { ch_output_singleton } @@ -213,20 +213,25 @@ workflow vcf { | set { ch_output } ch_output.slice - | flatMap { meta -> meta.project.samples.findAll{ sample -> sample.cram != null }.collect{ sample -> [*:meta, sample: sample] } } + | flatMap { meta -> meta.project.samples.findAll{ sample -> sample.cram != null || sample.bedmethyl != null}.collect{ sample -> [*:meta, sample: sample] } } | map { meta -> [meta, meta.vcf, meta.vcf_index, meta.sample.cram.data] } | slice | map { meta, cram -> [*:meta, cram: cram] } - | map { meta -> [groupKey(meta.project.id, meta.project.samples.count{ sample -> sample.cram != null }), meta] } + | map { meta -> [groupKey(meta.project.id, meta.project.samples.count{ sample -> sample.cram != null || sample.bedmethyl != null }), meta] } | groupTuple(remainder: true) | map { key, metaList -> - def meta = [*:metaList.first()].findAll { it.key != 'sample' && it.key != 'cram' } - [*:meta, crams: metaList.collect { [family_id: it.sample.family_id, individual_id: it.sample.individual_id, cram: it.cram] } ] + def meta = [*:metaList.first()].findAll { it.key != 'sample' && it.key != 'cram' && it.key != 'bedmethyl'} + [*:meta, crams: metaList.collect { [family_id: it.sample.family_id, individual_id: it.sample.individual_id, cram: it.cram] }, bedmethyls: metaList.collect { [family_id: it.sample.family_id, individual_id: it.sample.individual_id, bedmethyl: it.sample.bedmethyl] } ] } | set { ch_sliced } - + ch_sliced.mix(ch_output.ready) - | map { meta -> [meta, meta.vcf, meta.vcf_index, meta.crams ? meta.crams.collect { it.cram } : []] } + | map { meta -> + def bedmethylPaths = meta.bedmethyls ? meta.bedmethyls.collect { it.bedmethyl } : [] + bedmethylPaths = bedmethylPaths.findAll { it != null} + + [meta, meta.vcf, meta.vcf_index, meta.crams ? meta.crams.collect { it.cram } : [], bedmethylPaths] + } | report } @@ -238,7 +243,7 @@ workflow { // preprocess vcfs and crams in parallel Channel.from(projects) | map { project -> [project: project] } - | multiMap { it -> vcf: cram: it } + | multiMap { it -> vcf: cram: bedmethyl: it } | set { ch_project } // validate and liftover vcf per project @@ -289,11 +294,11 @@ workflow { | map { meta, containers -> [meta, [samples: containers.collect { [*:it.sample, cram: it.cram] }]] } | set { ch_project_cram_processed } - // merge vcf and cram channels and update project + // merge vcf, cram, and bedmethyl channels and update project Channel.empty().mix(ch_project_vcf_processed, ch_project_cram_processed) | map { meta, container -> [groupKey(meta, 2), container] } | groupTuple(remainder: true) - | map { key, group -> validateGroup(key, group) } + | map { key, group -> validateGroup(key, group) } | map { meta, containers -> def vcf = containers.find { it.vcf != null }.vcf def samples = containers.find { it.samples != null }.samples @@ -369,9 +374,16 @@ def validateVcfParams(inputAssemblies) { def includeCrams = params.vcf.report.include_crams if (!(includeCrams ==~ /true|false/)) exit 1, "parameter 'params.vcf.report.include_crams' value '${includeCrams}' is invalid. allowed values are [true, false]" + def includeBedmethyls = params.vcf.report.include_bedmethyls + if (!(includeBedmethyls ==~ /true|false/)) exit 1, "parameter 'params.vcf.report.include_bedmethyls' value '${includeBedmethyls}' is invalid. allowed values are [true, false]" + + def template = params.vcf.report.template if(!template.isEmpty() && !file(template).exists() ) exit 1, "parameter 'vcf.report.template' value '${template}' does not exist" + def vcf_report_jar = params.vcf.report.vcf_report_jar + if(!vcf_report_jar.isEmpty() && !file(vcf_report_jar).exists() ) exit 1, "parameter 'vcf.report.vcf_report_jar' value '${vcf_report_jar}' does not exist" + outputAssemblies.each { assembly -> def genes = params.vcf.report[assembly].genes if(!file(genes).exists() ) exit 1, "parameter 'vcf.report.${assembly}.genes' value '${genes}' does not exist" @@ -379,6 +391,8 @@ def validateVcfParams(inputAssemblies) { } def parseSampleSheet(csvFile) { + def bedmethylRegex = /.+\.(bedmethyl)?/ + def cols = [ assembly: [ type: "string", @@ -395,6 +409,10 @@ def parseSampleSheet(csvFile) { cram: [ type: "file", regex: getCramRegex() + ], + bedmethyl: [ + type: "file", + regex: bedmethylRegex ] ]