From ea46be354bdd85fbfb17fefdebeaa04f16d68a82 Mon Sep 17 00:00:00 2001
From: Ivan Blagoev Topolsky <ivan.topolsky@bsse.ethz.ch>
Date: Fri, 7 Jun 2024 11:07:44 +0200
Subject: [PATCH] Keep the linter happy

---
 .../resources/local_haplotype_setup/README.md | 32 ++++----
 .../performance_measures_indels_freqs.py      | 28 +++++--
 .../scripts/performance_measures_runtime.py   |  1 +
 .../performance_measures_snvs_freqs.py        | 55 ++++++++++----
 .../performance_measures_snvs_viloca.py       | 52 ++++++++-----
 .../cliquesnv_local_haplo.py                  |  2 +-
 .../cliquesnv_local_haplo_tf0.001.py          |  2 +-
 .../cliquesnv_local_haplo_tf0.01.py           | 44 +++++------
 .../cliquesnv_local_haplo_tf0.1.py            | 42 +++++------
 .../predicthaplo_local_haplo.py               | 73 +++++++++++--------
 .../method_definitions/shorah_default.py      |  6 +-
 .../method_definitions/strainline.py          | 25 +++----
 .../viloca_alpha_0.00001.py                   | 33 +++++++--
 .../method_definitions/viloca_envp.py         |  8 +-
 .../method_definitions/viloca_envp_0.001.py   | 10 +--
 .../scripts/bed_file_single_amplicon.py       |  2 +-
 .../workflow/scripts/generate_haplotypes.py   | 50 +++++++++----
 .../scripts/performance_measures_global.py    |  1 -
 .../scripts/performance_measures_local.py     |  2 +-
 .../workflow/scripts/shotgun_simulation.py    | 51 ++++++++-----
 workflow/scripts/file_parser.py               |  2 +-
 21 files changed, 321 insertions(+), 200 deletions(-)

diff --git a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/README.md b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/README.md
index 1bd3ff6d..01461f08 100644
--- a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/README.md
+++ b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/README.md
@@ -1,27 +1,29 @@
+# Local haplotype reconstruction benchmark 
+
 This repository stores the scripts and notebooks used to conduct the benchmark study presented in the manuscript: XXX
 
 To reproduce the benchmark study and create the figures presented in the manuscript, use the following instructions:
 1. Clone the repository of V-pipe 3.0 into your working directory:
-```
-git clone https://github.com/cbg-ethz/V-pipe.git
-```
+   ```shell
+   git clone https://github.com/cbg-ethz/V-pipe.git
+   ```
 2. Go into the directory of the benchmarking study for the local haplotype reconstruction:
-```
-cd V-pipe/resources/auxiliary_workflows/benchmark/resources/local_haplo_setup
-```
+   ```shell
+   cd V-pipe/resources/auxiliary_workflows/benchmark/resources/local_haplo_setup
+   ```
 3. The parameters to reproduce the synthetic datasets are in the parameter files `config_xxx/params.csv` with the configuration file `config_xxx/config.yaml` where simulation mode, replicate number and methods to be executed are defined.   
 4. The methods to execute must be define in a Python script in this directory:  
-```
-V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions.
-```
+   ```shell
+   V-pipe/resources/auxiliary_workflows/benchmark/resources/method_definitions.
+   ```
 5. Now the workflow is ready, go back to the directory  
-```
-cd V-pipe/resources/auxiliary_workflows/benchmark/resources/local_haplo_setup
-```
+   ```shell
+   cd V-pipe/resources/auxiliary_workflows/benchmark/resources/local_haplo_setup
+   ```
 6. To install the needed Conda environments execute:
-```
-snakemake --conda-create-envs-only --use-conda -c1
-```
+   ```shell
+   snakemake --conda-create-envs-only --use-conda -c1
+   ```
 7. To submit the workflow to a lsf-cluster execute `./run_workflow.sh`, otherwise execute the workflow with `snakemake --use-conda -c1`  
 8. The workflow will provide the results in the directory `results`.   
 9. When the workflow has terminated and all result files were generated, figures from the manuscript can be generated by executing the notebooks in `workflow/notebooks/`.  
diff --git a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_indels_freqs.py b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_indels_freqs.py
index 4aec704a..0646391a 100644
--- a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_indels_freqs.py
+++ b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_indels_freqs.py
@@ -27,10 +27,12 @@ def convert_vcf(fname):
             variant_list.add(f"{zero_based_pos}{base}")
     return variant_list
 
+
 def convert_groundtruth(fname):
     df = pd.read_csv(fname, index_col=0)
     return set((df["position"].astype(str) + df["variant"]).tolist())
 
+
 def mutation_calls_details(vcf_list, groundtruth_list):
     # compute performance
     tmp = []
@@ -45,11 +47,16 @@ def mutation_calls_details(vcf_list, groundtruth_list):
         predicted_variants = convert_vcf(fname_vcf)
 
         # iter through ground truth mutations
-        set_true_variants = [str(gt_row["position"]) + gt_row["variant"] for iter_row, gt_row in pd.read_csv(fname_groundtruth, index_col=0).iterrows()]
+        set_true_variants = [
+            str(gt_row["position"]) + gt_row["variant"]
+            for iter_row, gt_row in pd.read_csv(
+                fname_groundtruth, index_col=0
+            ).iterrows()
+        ]
         for iter_row, gt_row in pd.read_csv(fname_groundtruth, index_col=0).iterrows():
             true_variant = str(gt_row["position"]) + gt_row["variant"]
             mutation_type = str(gt_row["type"])
-            if method.startswith(('lofreq', 'shorah', 'viloca')):
+            if method.startswith(("lofreq", "shorah", "viloca")):
                 is_false_negative = True
                 for variant in VCF(fname_vcf):
                     for idx_base, base in enumerate(variant.ALT):
@@ -59,14 +66,20 @@ def mutation_calls_details(vcf_list, groundtruth_list):
                         if true_variant == predicted_variant:
                             is_false_negative = False
                             # get pred_freq
-                            if method.startswith('lofreq'):
+                            if method.startswith("lofreq"):
                                 posterior = 1.0
-                                freq = variant.INFO.get('AF')#vi.split(',')[idx_base]
-                            elif method.startswith(('shorah', 'viloca')):
+                                freq = variant.INFO.get("AF")  # vi.split(',')[idx_base]
+                            elif method.startswith(("shorah", "viloca")):
                                 if mutation_type != "insertion":
-                                    freq = (int(variant.INFO.get('Fvar')) + int(variant.INFO.get('Rvar'))) / (int(variant.INFO.get('Ftot')) + int(variant.INFO.get('Rtot')))
+                                    freq = (
+                                        int(variant.INFO.get("Fvar"))
+                                        + int(variant.INFO.get("Rvar"))
+                                    ) / (
+                                        int(variant.INFO.get("Ftot"))
+                                        + int(variant.INFO.get("Rtot"))
+                                    )
                                 elif mutation_type == "insertion":
-                                    freq = (float(variant.INFO.get('Freq1')))
+                                    freq = float(variant.INFO.get("Freq1"))
 
                             tmp.append(
                                 {
@@ -125,6 +138,7 @@ def main(vcf_list, groundtruth_list, fname_out):
     df = mutation_calls_details(vcf_list, groundtruth_list)
     df.to_csv(fname_out)
 
+
 if __name__ == "__main__":
     main(
         [Path(e) for e in snakemake.input.vcf_list],
diff --git a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_runtime.py b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_runtime.py
index bb8bcdf3..8cabd817 100644
--- a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_runtime.py
+++ b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_runtime.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 import pandas as pd
 
+
 def runtime(benchmark_list, fname_out):
     # gather benchmark information
     tmp = []
diff --git a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_freqs.py b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_freqs.py
index d9d0e6db..56bb4814 100644
--- a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_freqs.py
+++ b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_freqs.py
@@ -27,10 +27,12 @@ def convert_vcf(fname):
             variant_list.add(f"{zero_based_pos}{base}")
     return variant_list
 
+
 def convert_groundtruth(fname):
     df = pd.read_csv(fname, index_col=0)
     return set((df["position"].astype(str) + df["variant"]).tolist())
 
+
 def mutation_calls_details(vcf_list, groundtruth_list):
     # compute performance
     tmp = []
@@ -48,7 +50,7 @@ def mutation_calls_details(vcf_list, groundtruth_list):
         for iter_row, gt_row in pd.read_csv(fname_groundtruth, index_col=0).iterrows():
             true_variant = str(gt_row["position"]) + gt_row["variant"]
             mutation_type = str(gt_row["type"])
-            if method.startswith(('lofreq', 'shorah')):
+            if method.startswith(("lofreq", "shorah")):
                 is_false_negative = True
                 for variant in VCF(fname_vcf):
                     for idx_base, base in enumerate(variant.ALT):
@@ -58,12 +60,18 @@ def mutation_calls_details(vcf_list, groundtruth_list):
                         if true_variant == predicted_variant:
                             is_false_negative = False
                             # get pred_freq
-                            if method.startswith('lofreq'):
+                            if method.startswith("lofreq"):
                                 posterior = 1.0
-                                freq = variant.INFO.get('AF')#vi.split(',')[idx_base]
-                            elif method.startswith('shorah'):
+                                freq = variant.INFO.get("AF")  # vi.split(',')[idx_base]
+                            elif method.startswith("shorah"):
                                 posterior = 1.0
-                                freq = (int(variant.INFO.get('Fvar')) + int(variant.INFO.get('Rvar'))) / (int(variant.INFO.get('Ftot')) + int(variant.INFO.get('Rtot')))
+                                freq = (
+                                    int(variant.INFO.get("Fvar"))
+                                    + int(variant.INFO.get("Rvar"))
+                                ) / (
+                                    int(variant.INFO.get("Ftot"))
+                                    + int(variant.INFO.get("Rtot"))
+                                )
 
                             tmp.append(
                                 {
@@ -99,23 +107,41 @@ def mutation_calls_details(vcf_list, groundtruth_list):
                         }
                     )
 
-            elif method.startswith('viloca'):
-
-                colnames=['Chromosome', 'Pos', 'Ref', 'Alt', 'Frq', 'Pst', 'Rvar', 'Fvar', 'Rtot', 'Ftot', 'Qval']
+            elif method.startswith("viloca"):
+
+                colnames = [
+                    "Chromosome",
+                    "Pos",
+                    "Ref",
+                    "Alt",
+                    "Frq",
+                    "Pst",
+                    "Rvar",
+                    "Fvar",
+                    "Rtot",
+                    "Ftot",
+                    "Qval",
+                ]
                 # we want the file SNVs_0.010000.tsv as the posterior filter was not applied here
-                fname_SNVs_correct = str(fname_vcf).split("snvs_.vcf")[0] + "work/snv/SNVs_0.010000.tsv"
-                df_predicted = pd.read_csv(fname_SNVs_correct, names=colnames, header=None ,sep="\t")
-                df_predicted['Pst'] = pd.to_numeric(df_predicted['Pst'], errors='coerce')
+                fname_SNVs_correct = (
+                    str(fname_vcf).split("snvs_.vcf")[0] + "work/snv/SNVs_0.010000.tsv"
+                )
+                df_predicted = pd.read_csv(
+                    fname_SNVs_correct, names=colnames, header=None, sep="\t"
+                )
+                df_predicted["Pst"] = pd.to_numeric(
+                    df_predicted["Pst"], errors="coerce"
+                )
 
                 for iter_row_pred, pred_row in df_predicted.iterrows():
-                    zero_based_pos = int(pred_row['Pos']) - 1  # VCF is 1-based
+                    zero_based_pos = int(pred_row["Pos"]) - 1  # VCF is 1-based
                     is_false_negative = True
                     predicted_variant = f"{zero_based_pos}{pred_row['Alt']}"
 
                     if true_variant == predicted_variant:
                         is_false_negative = False
-                        posterior = pred_row['Pst']
-                        freq = pred_row['Frq']
+                        posterior = pred_row["Pst"]
+                        freq = pred_row["Frq"]
 
                         tmp.append(
                             {
@@ -158,6 +184,7 @@ def main(vcf_list, groundtruth_list, fname_out):
     df = mutation_calls_details(vcf_list, groundtruth_list)
     df.to_csv(fname_out)
 
+
 if __name__ == "__main__":
     main(
         [Path(e) for e in snakemake.input.vcf_list],
diff --git a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_viloca.py b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_viloca.py
index f96a8777..a606fa04 100644
--- a/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_viloca.py
+++ b/resources/auxiliary_workflows/benchmark/resources/local_haplotype_setup/workflow/scripts/performance_measures_snvs_viloca.py
@@ -18,11 +18,12 @@ def convert_vcf(df):
     variant_list = set()
 
     for idx_row, variant in df.iterrows():
-        for base in variant['Alt']:
-            zero_based_pos = int(variant['Pos']) - 1  # VCF is 1-based
+        for base in variant["Alt"]:
+            zero_based_pos = int(variant["Pos"]) - 1  # VCF is 1-based
             variant_list.add(f"{zero_based_pos}{base}")
     return variant_list
 
+
 def convert_groundtruth(fname):
     df = pd.read_csv(fname, index_col=0)
     return set((df["position"].astype(str) + df["variant"]).tolist())
@@ -76,20 +77,38 @@ def compute_performance(true_variants, predicted_variants):
 
 def performance_plots(vcf_list, groundtruth_list, posterior_threshold):
     # compute performance
-    colnames=['Chromosome', 'Pos', 'Ref', 'Alt', 'Frq', 'Pst', 'Rvar', 'Fvar', 'Rtot', 'Ftot', 'Qval']
+    colnames = [
+        "Chromosome",
+        "Pos",
+        "Ref",
+        "Alt",
+        "Frq",
+        "Pst",
+        "Rvar",
+        "Fvar",
+        "Rtot",
+        "Ftot",
+        "Qval",
+    ]
     tmp = []
     fps_tmp = []
     for fname_vcf, fname_groundtruth in zip(vcf_list, groundtruth_list):
 
         # we want the file SNVs_0.010000.tsv as the posterior filter was not applied here
-        fname_SNVs_correct = str(fname_vcf).split("snvs_.vcf")[0] + "work/snv/SNVs_0.010000.tsv"
+        fname_SNVs_correct = (
+            str(fname_vcf).split("snvs_.vcf")[0] + "work/snv/SNVs_0.010000.tsv"
+        )
         if os.path.isfile(fname_SNVs_correct):
-            df_predicted = pd.read_csv(fname_SNVs_correct, names=colnames, header=None ,sep="\t")
+            df_predicted = pd.read_csv(
+                fname_SNVs_correct, names=colnames, header=None, sep="\t"
+            )
         else:
-            fname_SNVs_correct = str(fname_vcf).split("snvs_.vcf")[0] + "work/snv/SNVs_0.010000_final.csv"
+            fname_SNVs_correct = (
+                str(fname_vcf).split("snvs_.vcf")[0]
+                + "work/snv/SNVs_0.010000_final.csv"
+            )
             df_predicted = pd.read_csv(fname_SNVs_correct)
-            df_predicted = df_predicted.rename(columns={'Var': 'Alt',
-                                                        'Pst2': 'Pst'})
+            df_predicted = df_predicted.rename(columns={"Var": "Alt", "Pst2": "Pst"})
 
         parts = str(fname_vcf).split("/")
 
@@ -98,11 +117,10 @@ def performance_plots(vcf_list, groundtruth_list, posterior_threshold):
         elif len(parts) == 8:  # for multi workflow
             _, _, _, params, method, _, replicate, _ = parts
 
-
-        #filter dataframe according to posterior_threshold
-        df_predicted['Pst'] = pd.to_numeric(df_predicted['Pst'], errors='coerce')
-        df_predicted = df_predicted.dropna(subset=['Pst'])
-        df_predicted = df_predicted[df_predicted['Pst']>posterior_threshold]
+        # filter dataframe according to posterior_threshold
+        df_predicted["Pst"] = pd.to_numeric(df_predicted["Pst"], errors="coerce")
+        df_predicted = df_predicted.dropna(subset=["Pst"])
+        df_predicted = df_predicted[df_predicted["Pst"] > posterior_threshold]
 
         true_variants = convert_groundtruth(fname_groundtruth)
         predicted_variants = convert_vcf(df_predicted)
@@ -143,7 +161,6 @@ def performance_plots(vcf_list, groundtruth_list, posterior_threshold):
     return df_perf
 
 
-
 def main(vcf_list, groundtruth_list, fname_out):
 
     posterior_thresholds = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]
@@ -152,15 +169,14 @@ def main(vcf_list, groundtruth_list, fname_out):
     for posterior_threshold in posterior_thresholds:
 
         df_pos_thres = performance_plots(
-                                vcf_list,
-                                groundtruth_list,
-                                posterior_threshold
-                                )
+            vcf_list, groundtruth_list, posterior_threshold
+        )
 
         dfs_tmp.append(df_pos_thres)
 
     pd.concat(dfs_tmp).to_csv(fname_out)
 
+
 if __name__ == "__main__":
     main(
         [Path(e) for e in snakemake.input.vcf_list],
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo.py
index 356c6af9..0fac5b6a 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo.py
@@ -27,7 +27,7 @@ def main(
         check=True,
     )
 
-    runtime_limit = 15*24*60*60 - 60*60  # 15 days - 1h
+    runtime_limit = 15 * 24 * 60 * 60 - 60 * 60  # 15 days - 1h
     cliquesnv_mode = None
     if seq_type == "illumina":
         cliquesnv_mode = "snv-illumina"
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.001.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.001.py
index 356c6af9..0fac5b6a 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.001.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.001.py
@@ -27,7 +27,7 @@ def main(
         check=True,
     )
 
-    runtime_limit = 15*24*60*60 - 60*60  # 15 days - 1h
+    runtime_limit = 15 * 24 * 60 * 60 - 60 * 60  # 15 days - 1h
     cliquesnv_mode = None
     if seq_type == "illumina":
         cliquesnv_mode = "snv-illumina"
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.01.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.01.py
index 52d91474..93e8e202 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.01.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.01.py
@@ -30,24 +30,24 @@ def main(
     else:
         # prepare environment
         subprocess.run(
-                ["samtools", "view", "-h", "-o", dname_work / "reads.sam", fname_bam],
-                check=True,
+            ["samtools", "view", "-h", "-o", dname_work / "reads.sam", fname_bam],
+            check=True,
         )
 
-        runtime_limit = 15*24*60*60 - 60*60  # 15 days - 1h
+        runtime_limit = 15 * 24 * 60 * 60 - 60 * 60  # 15 days - 1h
         cliquesnv_mode = None
         if seq_type == "illumina":
-                cliquesnv_mode = "snv-illumina"
+            cliquesnv_mode = "snv-illumina"
         elif seq_type == "pacbio":
-                cliquesnv_mode = "snv-pacbio"
+            cliquesnv_mode = "snv-pacbio"
         elif seq_type == "nanopore":
-                cliquesnv_mode = "snv-pacbio"
+            cliquesnv_mode = "snv-pacbio"
         else:
-                raise RuntimeError(f"Invalid sequence technology: {seq_type}")
+            raise RuntimeError(f"Invalid sequence technology: {seq_type}")
 
         # execute tool
         subprocess.run(
-                [
+            [
                 "cliquesnv",
                 "-m",
                 cliquesnv_mode,
@@ -61,27 +61,27 @@ def main(
                 str(runtime_limit),  # 119h*60*60
                 "-threads",
                 str(threads),
-                "-Xmx50G", # -Xmx50G
-                ],
-                check=True,
+                "-Xmx50G",  # -Xmx50G
+            ],
+            check=True,
         )
         fname_cliquesnv = dname_work / "output" / "reads.fasta"
 
         # fix frequency information
         if fname_cliquesnv.exists():
-                record_list = []
-                for record in SeqIO.parse(fname_cliquesnv, "fasta"):
-                        _, _, freq = record.id.split("_")
-                        record.description = f"freq:{freq}"
+            record_list = []
+            for record in SeqIO.parse(fname_cliquesnv, "fasta"):
+                _, _, freq = record.id.split("_")
+                record.description = f"freq:{freq}"
 
-                        seq_nodel = str(record.seq).replace("-", "")
-                        record.seq = Seq(seq_nodel)
+                seq_nodel = str(record.seq).replace("-", "")
+                record.seq = Seq(seq_nodel)
 
-                        record_list.append(record)
-    #else:
+                record_list.append(record)
+            # else:
 
-    #    record_list = []
-                SeqIO.write(record_list, fname_result_haplos, "fasta")
+            #    record_list = []
+            SeqIO.write(record_list, fname_result_haplos, "fasta")
 
         # create empty vcf files
         f = open(fname_results_snv, "a")
@@ -98,4 +98,4 @@ def main(
         Path(snakemake.output.dname_work),
         snakemake.wildcards.seq_tech,
         snakemake.threads,
-        )
+    )
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.1.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.1.py
index daea809d..c00c16e1 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.1.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/cliquesnv_local_haplo_tf0.1.py
@@ -31,24 +31,24 @@ def main(
     else:
         # prepare environment
         subprocess.run(
-                ["samtools", "view", "-h", "-o", dname_work / "reads.sam", fname_bam],
-                check=True,
+            ["samtools", "view", "-h", "-o", dname_work / "reads.sam", fname_bam],
+            check=True,
         )
 
-        runtime_limit = 15*24*60*60 - 60*60  # 15 days - 1h
+        runtime_limit = 15 * 24 * 60 * 60 - 60 * 60  # 15 days - 1h
         cliquesnv_mode = None
         if seq_type == "illumina":
-                cliquesnv_mode = "snv-illumina"
+            cliquesnv_mode = "snv-illumina"
         elif seq_type == "pacbio":
-                cliquesnv_mode = "snv-pacbio"
+            cliquesnv_mode = "snv-pacbio"
         elif seq_type == "nanopore":
-                cliquesnv_mode = "snv-pacbio"
+            cliquesnv_mode = "snv-pacbio"
         else:
-                raise RuntimeError(f"Invalid sequence technology: {seq_type}")
+            raise RuntimeError(f"Invalid sequence technology: {seq_type}")
 
         # execute tool
         subprocess.run(
-                [
+            [
                 "cliquesnv",
                 "-m",
                 cliquesnv_mode,
@@ -62,27 +62,27 @@ def main(
                 str(runtime_limit),  # 119h*60*60
                 "-threads",
                 str(threads),
-                "-Xmx50G", # -Xmx50G
-                ],
-                check=True,
+                "-Xmx50G",  # -Xmx50G
+            ],
+            check=True,
         )
         fname_cliquesnv = dname_work / "output" / "reads.fasta"
 
         # fix frequency information
         if fname_cliquesnv.exists():
-                record_list = []
-                for record in SeqIO.parse(fname_cliquesnv, "fasta"):
-                        _, _, freq = record.id.split("_")
-                        record.description = f"freq:{freq}"
+            record_list = []
+            for record in SeqIO.parse(fname_cliquesnv, "fasta"):
+                _, _, freq = record.id.split("_")
+                record.description = f"freq:{freq}"
 
-                        seq_nodel = str(record.seq).replace("-", "")
-                        record.seq = Seq(seq_nodel)
+                seq_nodel = str(record.seq).replace("-", "")
+                record.seq = Seq(seq_nodel)
 
-                        record_list.append(record)
-    #else:
+                record_list.append(record)
+            # else:
 
-    #    record_list = []
-                SeqIO.write(record_list, fname_result_haplos, "fasta")
+            #    record_list = []
+            SeqIO.write(record_list, fname_result_haplos, "fasta")
 
         # create empty vcf files
         f = open(fname_results_snv, "a")
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py
index 15c5f885..0fae97ee 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py
@@ -30,49 +30,54 @@
 from Bio.SeqRecord import SeqRecord
 
 
-def write_config_file(fname_ref, fname_sam,fname_config, ph_prefix, dname_work, fname_result_haplos):
+def write_config_file(
+    fname_ref, fname_sam, fname_config, ph_prefix, dname_work, fname_result_haplos
+):
 
     read_length = str(fname_sam).split("read_length~")[1].split("__")[0]
     if read_length == "Ten_strain_IAV":
         read_length = 2300
         fname_output_haplos = "predicthaplo_output_global_1_2263.fas"
 
-    with open((dname_work / "config_5V").resolve(), 'r') as file:
+    with open((dname_work / "config_5V").resolve(), "r") as file:
         content = file.readlines()
 
-    modified_content =  content
-    modified_content[2] = str(ph_prefix)+'\n'
-    modified_content[4] = str(fname_ref)+'\n'
-    modified_content[6] = "0"+'\n'
-    modified_content[8] = str(fname_sam)+'\n'
-    modified_content[10] = "0"+'\n'
-    modified_content[12] = str(fname_result_haplos)+'\n'
-    modified_content[16] = "10000"+'\n'
-    modified_content[20] = "1"+'\n'
-    modified_content[22] = str(int(read_length)+1)+'\n'
+    modified_content = content
+    modified_content[2] = str(ph_prefix) + "\n"
+    modified_content[4] = str(fname_ref) + "\n"
+    modified_content[6] = "0" + "\n"
+    modified_content[8] = str(fname_sam) + "\n"
+    modified_content[10] = "0" + "\n"
+    modified_content[12] = str(fname_result_haplos) + "\n"
+    modified_content[16] = "10000" + "\n"
+    modified_content[20] = "1" + "\n"
+    modified_content[22] = str(int(read_length) + 1) + "\n"
 
     # the following parameters are set such that we don't get the following error:
     # died with <Signals.SIGFPE: 8>.
-    modified_content[30] = "-1"+'\n'
-    modified_content[34] = "0.1"+'\n'
-    modified_content[18] = "0.0001"+'\n'
-    modified_content[26] = "1"+'\n'
+    modified_content[30] = "-1" + "\n"
+    modified_content[34] = "0.1" + "\n"
+    modified_content[18] = "0.0001" + "\n"
+    modified_content[26] = "1" + "\n"
 
-    with open(fname_config, 'w') as file:
+    with open(fname_config, "w") as file:
         file.writelines(modified_content)
 
 
-def main(fname_bam,
-         fname_reference,
-         fname_results_snv,
-         fname_result_haplos,
-         dname_work,
-         seq_type,
-         threads,
+def main(
+    fname_bam,
+    fname_reference,
+    fname_results_snv,
+    fname_result_haplos,
+    dname_work,
+    seq_type,
+    threads,
 ):
     dname_work.mkdir(parents=True, exist_ok=True)
 
-    predicthaplo_exe = "/cluster/work/bewi/members/lfuhrmann/PredictHaplo-1.1/PredictHaplo"
+    predicthaplo_exe = (
+        "/cluster/work/bewi/members/lfuhrmann/PredictHaplo-1.1/PredictHaplo"
+    )
 
     # prepare environment
     subprocess.run(
@@ -84,7 +89,10 @@ def main(fname_bam,
 
     # copy example config file
     subprocess.run(
-        ["wget", "https://raw.githubusercontent.com/bmda-unibas/PredictHaplo/master/config_5V"],
+        [
+            "wget",
+            "https://raw.githubusercontent.com/bmda-unibas/PredictHaplo/master/config_5V",
+        ],
         cwd=dname_work,
         check=True,
     )
@@ -92,7 +100,14 @@ def main(fname_bam,
     # modify config file for predicthaplo
     fname_config = (dname_work / "predicthaplo_config").resolve()
     ph_prefix = (dname_work / "predicthaplo_output_").resolve()
-    write_config_file(Path(fname_reference).resolve(), fname_sam, fname_config, ph_prefix, dname_work, fname_result_haplos)
+    write_config_file(
+        Path(fname_reference).resolve(),
+        fname_sam,
+        fname_config,
+        ph_prefix,
+        dname_work,
+        fname_result_haplos,
+    )
 
     # execute tool
     subprocess.run(
@@ -112,8 +127,8 @@ def main(fname_bam,
 
     # clean output file
     record_list = []
-    for record in SeqIO.parse( (dname_work / fname_output_haplos).resolve(), "fasta"):
-        freq = record.description.split('Freq:')[1].split(". Overla")[0]
+    for record in SeqIO.parse((dname_work / fname_output_haplos).resolve(), "fasta"):
+        freq = record.description.split("Freq:")[1].split(". Overla")[0]
         record.description = f"freq:{freq}"
         seq_nodel = str(record.seq).replace("-", "")
         record.seq = Seq(seq_nodel)
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/shorah_default.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/shorah_default.py
index b74b3282..af280068 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/shorah_default.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/shorah_default.py
@@ -5,10 +5,12 @@
 from pathlib import Path
 
 
-def main(fname_bam, fname_reference, fname_result, fname_result_haplos, dname_work, seq_tech):
+def main(
+    fname_bam, fname_reference, fname_result, fname_result_haplos, dname_work, seq_tech
+):
     dname_work.mkdir(parents=True, exist_ok=True)
 
-    if seq_tech == 'illumina':
+    if seq_tech == "illumina":
         window_length = 162
 
         subprocess.run(
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/strainline.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/strainline.py
index b135613c..7b2829be 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/strainline.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/strainline.py
@@ -26,15 +26,15 @@ def fastq_to_fasta(input_file, output_file):
             SeqIO.write(record, f_out, "fasta")
 
 
-
-def main(fname_bam,
-         fname_reference,
-         fname_results_snv,
-         fname_result_haplos,
-         dname_work,
-         seq_type,
-         threads,):
-
+def main(
+    fname_bam,
+    fname_reference,
+    fname_results_snv,
+    fname_result_haplos,
+    dname_work,
+    seq_type,
+    threads,
+):
 
     # create empty vcf files
     f = open(fname_results_snv, "a")
@@ -58,8 +58,8 @@ def main(fname_bam,
             seq_platform = "ont"
 
         # fastq to fasta
-        fname_reads_fastq = str(fname_bam.resolve()).split("bam")[0]+"fastq"
-        fname_reads_fasta = str(fname_bam.resolve()).split("bam")[0]+"fasta"
+        fname_reads_fastq = str(fname_bam.resolve()).split("bam")[0] + "fastq"
+        fname_reads_fasta = str(fname_bam.resolve()).split("bam")[0] + "fasta"
         fastq_to_fasta(fname_reads_fastq, fname_reads_fasta)
 
         dname_work.mkdir(parents=True, exist_ok=True)
@@ -83,9 +83,6 @@ def main(fname_bam,
         # adapt haplotype output
 
 
-
-
-
 if __name__ == "__main__":
     main(
         Path(snakemake.input.fname_bam),
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_alpha_0.00001.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_alpha_0.00001.py
index 6830f7ca..6878caae 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_alpha_0.00001.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_alpha_0.00001.py
@@ -27,11 +27,19 @@ def gunzip(source_filepath, dest_filepath, block_size=65536):
             else:
                 d_file.write(block)
 
+
 def filter_snvs(fname_vcf_viloca, fname_results_snv, posterior_threshold):
     vf = pyvcf.VcfFrame.from_file(fname_vcf_viloca)
-    info_strings = '{"' + vf.df.INFO.str.split(';').str.join('","').str.replace('=','":"').str.replace("\"\",", "") + '"}'
+    info_strings = (
+        '{"'
+        + vf.df.INFO.str.split(";")
+        .str.join('","')
+        .str.replace("=", '":"')
+        .str.replace('"",', "")
+        + '"}'
+    )
     info_df = pd.json_normalize(info_strings.apply(eval))
-    filter_out = info_df["Post1"].astype(float)>posterior_threshold
+    filter_out = info_df["Post1"].astype(float) > posterior_threshold
     vf.df = vf.df[filter_out]
     vf.to_file(fname_results_snv)
 
@@ -43,7 +51,7 @@ def main(
     fname_results_snv,
     fname_result_haplos,
     dname_work,
-    threads =1
+    threads=1,
 ):
     genome_size = str(fname_bam).split("genome_size~")[1].split("__coverage")[0]
     alpha = 0.00001
@@ -115,8 +123,14 @@ def main(
     # here are all snvs regardless their posterior: (dname_work / "snv" / "SNVs_0.010000_final.vcf")
 
     # filter out snvs with low posterior, threshold = 0.9 as default in shorah
-    fname_vcf_viloca = str(Path(dname_work / "snv" / "SNVs_0.010000_final.vcf").resolve())
-    filter_snvs(fname_vcf_viloca, str(Path(fname_results_snv).resolve()), posterior_threshold = 0.9)
+    fname_vcf_viloca = str(
+        Path(dname_work / "snv" / "SNVs_0.010000_final.vcf").resolve()
+    )
+    filter_snvs(
+        fname_vcf_viloca,
+        str(Path(fname_results_snv).resolve()),
+        posterior_threshold=0.9,
+    )
 
     mypath = (dname_work / "support").resolve()
     onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
@@ -134,7 +148,9 @@ def main(
 
             elif file.endswith(".fas"):
                 fname_haplos = (dname_work / "support" / onlyfiles[0]).resolve()
-                shutil.copy((dname_work / "support" / file).resolve(), fname_result_haplos)
+                shutil.copy(
+                    (dname_work / "support" / file).resolve(), fname_result_haplos
+                )
 
     # fix frequency information
 
@@ -142,7 +158,9 @@ def main(
     post_list = []
     for record in SeqIO.parse(fname_result_haplos, "fasta"):
         freq_list.append(float(record.description.split("ave_reads=")[-1]))
-        post_list.append(float(record.description.split("posterior=")[-1].split(" ave_")[0]))
+        post_list.append(
+            float(record.description.split("posterior=")[-1].split(" ave_")[0])
+        )
     norm_freq_list = [float(i) / sum(freq_list) for i in freq_list]
 
     record_list = []
@@ -161,4 +179,3 @@ def main(
         Path(snakemake.output.fname_result_haplos),
         Path(snakemake.output.dname_work),
     )
-                                                                                                                                                                                                                                                                                      165,1         Bot
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp.py
index 35288af3..6f8ac848 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp.py
@@ -37,11 +37,11 @@ def main(
     n_mfa_starts = 1
     win_min_ext = 0.85
 
-    #coverage = str(fname_bam).split("coverage~")[1].split("__")[0]
-    #if float(coverage) > 200:
+    # coverage = str(fname_bam).split("coverage~")[1].split("__")[0]
+    # if float(coverage) > 200:
     #    exclude_non_var_pos_threshold = 2 / float(coverage)
-    #else:
-   #     exclude_non_var_pos_threshold = 1 / float(coverage)
+    # else:
+    #     exclude_non_var_pos_threshold = 1 / float(coverage)
 
     exclude_non_var_pos_threshold = 0.01
     read_length = str(fname_bam).split("read_length~")[1].split("__")[0]
diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp_0.001.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp_0.001.py
index 77058b9a..45ef81e9 100644
--- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp_0.001.py
+++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/viloca_envp_0.001.py
@@ -37,11 +37,11 @@ def main(
     n_mfa_starts = 1
     win_min_ext = 0.85
 
-    #coverage = str(fname_bam).split("coverage~")[1].split("__")[0]
-    #if float(coverage) > 200:
+    # coverage = str(fname_bam).split("coverage~")[1].split("__")[0]
+    # if float(coverage) > 200:
     #    exclude_non_var_pos_threshold = 2 / float(coverage)
-    #else:
-   #     exclude_non_var_pos_threshold = 1 / float(coverage)
+    # else:
+    #     exclude_non_var_pos_threshold = 1 / float(coverage)
 
     exclude_non_var_pos_threshold = 0.001
     read_length = str(fname_bam).split("read_length~")[1].split("__")[0]
@@ -106,7 +106,6 @@ def main(
 
     (dname_work / "snv" / "SNVs_0.010000_final.vcf").rename(fname_results_snv)
 
-
     mypath = (dname_work / "support").resolve()
     onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
     print("onlyfiles", onlyfiles)
@@ -148,4 +147,3 @@ def main(
         Path(snakemake.output.fname_result_haplos),
         Path(snakemake.output.dname_work),
     )
-                                                                                                                                                                                                                                                                                      149,1         Bot
diff --git a/resources/auxiliary_workflows/benchmark/workflow/scripts/bed_file_single_amplicon.py b/resources/auxiliary_workflows/benchmark/workflow/scripts/bed_file_single_amplicon.py
index 187c9192..9ee48f85 100644
--- a/resources/auxiliary_workflows/benchmark/workflow/scripts/bed_file_single_amplicon.py
+++ b/resources/auxiliary_workflows/benchmark/workflow/scripts/bed_file_single_amplicon.py
@@ -10,7 +10,7 @@ def main(fname_reference, fname_insert_bed, genome_size):
         f.write(
             str(seq_id)
             + "  1       "
-            + str(int(genome_size)+1)
+            + str(int(genome_size) + 1)
             + "     scheme_INSERT_1 1       +"
         )
 
diff --git a/resources/auxiliary_workflows/benchmark/workflow/scripts/generate_haplotypes.py b/resources/auxiliary_workflows/benchmark/workflow/scripts/generate_haplotypes.py
index f5169512..27d361b2 100644
--- a/resources/auxiliary_workflows/benchmark/workflow/scripts/generate_haplotypes.py
+++ b/resources/auxiliary_workflows/benchmark/workflow/scripts/generate_haplotypes.py
@@ -48,27 +48,43 @@ def generate_haplotype(seq_master, mutation_rate=0, insertion_rate=0, deletion_r
     deletion_count = int(len(seq_haplotype) * deletion_rate)
     insertion_count = int(len(seq_haplotype) * insertion_rate)
     position_list = np.random.choice(
-        np.asarray([number for number in np.arange(len(seq_haplotype)) if not number in position_list_muts ]),
-        size=deletion_count+insertion_count,
-        replace=False
+        np.asarray(
+            [
+                number
+                for number in np.arange(len(seq_haplotype))
+                if not number in position_list_muts
+            ]
+        ),
+        size=deletion_count + insertion_count,
+        replace=False,
     )
     position_list = sorted(position_list, reverse=True)
 
     # insertions
     seq_haplotype = list(seq_haplotype)
     insertion_positions = position_list[:insertion_count]
-    insertion_list = np.random.choice(BASE_LIST, size=(insertion_count,3))
+    insertion_list = np.random.choice(BASE_LIST, size=(insertion_count, 3))
     insertion_list = np.asarray(["".join(aa) for aa in insertion_list])
-    for count, (insert_pos, insert) in enumerate(zip(insertion_positions, insertion_list)):
+    for count, (insert_pos, insert) in enumerate(
+        zip(insertion_positions, insertion_list)
+    ):
         # count is 1- based
         # Insert values along the given axis before the given indices.
-        seq_haplotype.insert(insert_pos+count,insert)
+        seq_haplotype.insert(insert_pos + count, insert)
 
     ground_truth["type"].extend(["insertion"] * insertion_count)
-    ground_truth["position"].extend([pos -1 for pos in insertion_positions]) # 0-based
-    ground_truth["variant"].extend([ref+codon for (ref, codon) in zip(seq_master[[pos -1 for pos in insertion_positions]], insertion_list)])
-    ground_truth["reference"].extend(seq_master[[pos -1 for pos in insertion_positions]])
-
+    ground_truth["position"].extend([pos - 1 for pos in insertion_positions])  # 0-based
+    ground_truth["variant"].extend(
+        [
+            ref + codon
+            for (ref, codon) in zip(
+                seq_master[[pos - 1 for pos in insertion_positions]], insertion_list
+            )
+        ]
+    )
+    ground_truth["reference"].extend(
+        seq_master[[pos - 1 for pos in insertion_positions]]
+    )
 
     # deletions
     seq_haplotype = np.asarray(seq_haplotype)
@@ -76,11 +92,17 @@ def generate_haplotype(seq_master, mutation_rate=0, insertion_rate=0, deletion_r
 
     for delection_pos in deletion_positions:
         # Return a new array with sub-arrays along an axis deleted. For a one dimensional array, this returns those entries not returned by arr[obj].
-        seq_haplotype = np.delete(seq_haplotype, [delection_pos, delection_pos+1, delection_pos+2])
+        seq_haplotype = np.delete(
+            seq_haplotype, [delection_pos, delection_pos + 1, delection_pos + 2]
+        )
         ground_truth["type"].extend(["deletion"])
-        ground_truth["position"].extend([delection_pos-2])
-        ground_truth["variant"].extend([seq_master[delection_pos-2]]) #delection_pos ?
-        ground_truth["reference"].extend(["".join(seq_master[delection_pos-2:delection_pos+2])])
+        ground_truth["position"].extend([delection_pos - 2])
+        ground_truth["variant"].extend(
+            [seq_master[delection_pos - 2]]
+        )  # delection_pos ?
+        ground_truth["reference"].extend(
+            ["".join(seq_master[delection_pos - 2 : delection_pos + 2])]
+        )
 
     return "".join(seq_haplotype), pd.DataFrame(ground_truth)
 
diff --git a/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_global.py b/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_global.py
index 4f1382f9..e0765739 100644
--- a/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_global.py
+++ b/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_global.py
@@ -44,7 +44,6 @@ def read_fasta_files(fasta_files, with_method=True):
             else:
                 posterior = np.nan
 
-
             if freq is None:
                 freq = props.get("Freq")
 
diff --git a/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_local.py b/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_local.py
index 0e30234d..0efbe50c 100644
--- a/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_local.py
+++ b/resources/auxiliary_workflows/benchmark/workflow/scripts/performance_measures_local.py
@@ -20,7 +20,7 @@ def convert_vcf(fname):
         vcf = VCF(fname)
     except OSError:
         return set()
-        
+
     for variant in VCF(fname):
         for base in variant.ALT:
             zero_based_pos = variant.POS - 1  # VCF is 1-based
diff --git a/resources/auxiliary_workflows/benchmark/workflow/scripts/shotgun_simulation.py b/resources/auxiliary_workflows/benchmark/workflow/scripts/shotgun_simulation.py
index f681c7f8..82c20c6f 100644
--- a/resources/auxiliary_workflows/benchmark/workflow/scripts/shotgun_simulation.py
+++ b/resources/auxiliary_workflows/benchmark/workflow/scripts/shotgun_simulation.py
@@ -18,7 +18,7 @@ def simulate_illumina(fname_haplotype, coverage_haplotype, read_length, art_pref
     subprocess.run(
         [
             "art_illumina",
-            "-sam", # this alignment fails when we simulate indels in the haplotypes
+            "-sam",  # this alignment fails when we simulate indels in the haplotypes
             "--paired",
             "-m",
             str(
@@ -285,25 +285,36 @@ def main(
         insertion_rate = float(params["haplos"].split("@")[-3])
         # bwa is not able to align the simulated indels correctly.vim
 
-        if (insertion_rate>0) or (deletion_rate>0):
-                subprocess.run(["rm", fname_bam])
-                # align reads
-                subprocess.run(
-                    [
-                        "minimap2",
-                        "-ax",
-                        "sr",
-                        "--secondary=no",
-                        str(dname_work.resolve())[:-4] +"reference.fasta",
-                        str(fname_fastq.resolve()),
-                        "-o",
-                        str(art_prefix) + ".sam",
-                    ]
-                )
-
-                subprocess.run(["samtools", "sort", str(art_prefix) + ".sam", "-o", str(art_prefix) + ".sam"])
-
-                subprocess.run(["samtools", "view", "-bS", str(art_prefix) + ".sam", "-o", fname_bam])
+        if (insertion_rate > 0) or (deletion_rate > 0):
+            subprocess.run(["rm", fname_bam])
+            # align reads
+            subprocess.run(
+                [
+                    "minimap2",
+                    "-ax",
+                    "sr",
+                    "--secondary=no",
+                    str(dname_work.resolve())[:-4] + "reference.fasta",
+                    str(fname_fastq.resolve()),
+                    "-o",
+                    str(art_prefix) + ".sam",
+                ]
+            )
+
+            subprocess.run(
+                [
+                    "samtools",
+                    "sort",
+                    str(art_prefix) + ".sam",
+                    "-o",
+                    str(art_prefix) + ".sam",
+                ]
+            )
+
+            subprocess.run(
+                ["samtools", "view", "-bS", str(art_prefix) + ".sam", "-o", fname_bam]
+            )
+
 
 if __name__ == "__main__":
     main(
diff --git a/workflow/scripts/file_parser.py b/workflow/scripts/file_parser.py
index 2acbb904..27360ab0 100755
--- a/workflow/scripts/file_parser.py
+++ b/workflow/scripts/file_parser.py
@@ -202,7 +202,7 @@ def main():
     datefmt = None
     if args.config_file:
         with open(args.config_file, "r") as stream:
-            yaml = ruamel.yaml.YAML(typ='rt')
+            yaml = ruamel.yaml.YAML(typ="rt")
             reg_data = yaml.load(stream)
         if "sample" in reg_data:
             rxsam = regex.compile(reg_data.get("sample"))