diff --git a/cutseq/run.py b/cutseq/run.py index fcd546e..f5a9ecf 100644 --- a/cutseq/run.py +++ b/cutseq/run.py @@ -222,14 +222,15 @@ def __init__(self): BUILDIN_ADAPTERS = { - # dsDNA ligation, A tailing method, do ot need to trim - "DSLIGATION": "AGTTCTACAGTCCGACGATCT>AGATCGGAAGAGCACACGTC", + ## RNA library # Small RNA, double ligation method, without barcode # p5 - insert - p7 # (Optional) trim 2nt on both end to increase quality - "SMALLRNA": "CACGACGCTCTTCCGATCT>AGATCGGAAGAGCACACGTC", + # NOTE: i5 is small RNA adapter, which is different from truseq i5 + "SMALLRNA": "AGTTCTACAGTCCGACGATC>AGATCGGAAGAGCACACGTC", # inline barcode method # ref: https://www.nature.com/articles/nmeth0318-226c/figures/1 + # NOTE: i5 is small RNA adapter, which is different from truseq i5 "INLINE": "AGTTCTACAGTCCGACGATCNNNNN>NNNNN(ATCACG)AGATCGGAAGAGCACACGTC", # p5 - (random rt tail in TSO) - reverse insert - (random primer start?) - p7 "TAKARAV2": "ACACGACGCTCTTCCGATCTXXXXXNNNNNNNNAGATCGGAAGAGCACACGTC", + "SACSEQV3": "ACACGACGCTCTTCCGATCTNNNNNNNNX>XXNNNNNNNNAGATCGGAAGAGCACACGTC", # p5 - [might be 6bp of polyC] - reverse insert (cDNA) - adaptase tail (CCCCCC) - p7 # 6nt of polyG in 5' of R1 might from random RT primer # adaptase tail can be as long as 15bp at the 5' of R2 of polyG) # no UMI, but try to use random polyC tail as UMI # legacy name: "SWIFT" "XGENRNA": "ACACGACGCTCTTCCGATCTXXXXXXAGATCGGAAGAGCACACGTC", + # The general method for xGen / Swift kit, might be better than hard clip, TODO + # '-a "C{20};e=0.5;o=1" -G "G{20};e=0.5;o=1"' might be better + # "xGenDNA": "ACACGACGCTCTTCCGATCTXXX>(CCCCCCCCCCCCCCCCCCCC;noninternal;e=0.5;o=1)AGATCGGAAGAGCACACGTC", # https://www.idtdna.com/pages/products/next-generation-sequencing/workflow/xgen-ngs-library-preparation/methyl-seq-dna-library-kit#product-details # https://sfvideo.blob.core.windows.net/sitefinity/docs/default-source/technical-report/tail-trimming-for-better-data-technical-note.pdf?sfvrsn=135efe07_4 # 10 bases from END of R1 10 bases from START of R2 @@ -259,16 +270,11 @@ def __init__(self): "XGENMETHY": "ACACGACGCTCTTCCGATCTXX>XXXXXXXXXXAGATCGGAAGAGCACACGTC", # for snmC-seq, trim 15 bases "XGENSNMC": "ACACGACGCTCTTCCGATCTXXXXXX>XXXXXXXXXXXXXXXAGATCGGAAGAGCACACGTC", - # The general method for xGen / Swift kit, might be better than hard clip, TODO - # '-a "C{20};e=0.5;o=1" -G "G{20};e=0.5;o=1"' might be better - # "xGenDNA": "ACACGACGCTCTTCCGATCTXXX>(CCCCCCCCCCCCCCCCCCCC;noninternal;e=0.5;o=1)AGATCGGAAGAGCACACGTC", # PBAT: method use random primer to add both p5 and p7, # and there might be random tail at the 5' end of both reads "PBAT": "ACACGACGCTCTTCCGATCTXXXXXXCTGTCTCTTATACACATCT", - # Illumina Strand-Specific RNA-Seq Library Prep - "ILLUMINARNA": "AGATGTGTATAAGAGACAG
- AGTTCTACAGTCCGACGATCT - > + AGTTCTACAGTCCGACGATC +
+
+ > +
AGATCGGAAGAGCACACGTC
@@ -32,8 +35,11 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa ### SMALLRNA (Small RNA Libraries)
- CACGACGCTCTTCCGATCT - > + AGTTCTACAGTCCGACGATC +
+
+ > +
AGATCGGAAGAGCACACGTC
@@ -48,7 +54,10 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa
AGTTCTACAGTCCGACGATC NNNNN - > +
+
+ > +
NNNNN (ATCACG) AGATCGGAAGAGCACACGTC @@ -64,110 +73,12 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa
ACACGACGCTCTTCCGATCT - XXX - < - XXX - AGATCGGAAGAGCACACGTC -
-
- -- Earlier version of TAKARA stranded protocol -- Includes masking for template switching artifacts -- Reverse orientation to RNA -- No UMI sequences - -### STRANDED (Generic Stranded RNA-seq) -
-
- ACACGACGCTCTTCCGATCT - X - < - XXX - AGATCGGAAGAGCACACGTC -
-
- -- Basic stranded RNA-seq protocol -- Minimal masking for ligation artifacts -- Reverse orientation -- No UMI sequences - -### TAKARAV3 (SMARTerĀ® Stranded Total RNA-Seq Kit v3) -
-
- ACACGACGCTCTTCCGATCT - XXX - < XXXXXX - NNNNNNNN - AGATCGGAAGAGCACACGTC -
-
- -- Used for stranded RNA-seq -- Contains 8nt UMI -- Reverse orientation to RNA -- Includes masking for template switching artifacts - -### ECLIP6 (eCLIP Protocol) -
-
- ACACGACGCTCTTCCGATCT - XX - < - X - NNNNNN - AGATCGGAAGAGCACACGTC -
-
- -- Used for eCLIP and similar protocols -- Contains 6nt UMI -- Reverse orientation -- Short masking regions - -### ECLIP10 (Extended eCLIP Protocol) -
-
- ACACGACGCTCTTCCGATCT - XX - < - X - NNNNNNNNNN - AGATCGGAAGAGCACACGTC -
-
- -- Extended version of eCLIP protocol -- Contains 10nt UMI for higher complexity -- Reverse orientation -- Short masking regions - -### SACSEQV3 (SAC-seq Protocol V3) -
-
- AGTTCTACAGTCCGACGATCT - NNNNNNNN - X - > - XX - NNNNNNNN - AGATCGGAAGAGCACACGTC -
-
- -- Dual UMI design (8nt each) -- Forward orientation -- Balanced masking on both sides -- Used for high-complexity libraries - -### XGENRNA (xGen RNA Library Prep) -
-
- ACACGACGCTCTTCCGATCT +
+
+ < +
XXXXXX - < - XXXXXXXXXXXXXXX AGATCGGAAGAGCACACGTC
@@ -177,44 +88,15 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa - Reverse orientation - Uses random polyC tail as pseudo-UMI -### XGENMETHY (xGen Methyl-Seq) -
-
- ACACGACGCTCTTCCGATCT - XX - > - XXXXXXXXXX - AGATCGGAAGAGCACACGTC -
-
- -- Designed for methylation sequencing -- Trims 10 bases from read ends -- Forward orientation -- Includes random primer artifact removal - -### XGENSNMC (snmC-seq Protocol) -
-
- ACACGACGCTCTTCCGATCT - XXXXXX - > - XXXXXXXXXXXXXXX - AGATCGGAAGAGCACACGTC -
-
- -- Specialized for single-nucleus methylome sequencing -- Extended 15-base trimming -- Forward orientation -- Heavy masking for protocol artifacts - ### PBAT (Post-Bisulfite Adapter Tagging)
ACACGACGCTCTTCCGATCT XXXXXX - < +
+
+ < +
XXXXXX AGATCGGAAGAGCACACGTC
@@ -229,7 +111,10 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa
AGATGTGTATAAGAGACAG - > +
+
+ > +
CTGTCTCTTATACACATCT
@@ -243,7 +128,10 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa
AGATGTGTATAAGAGACAG - < +
+
+ < +
CTGTCTCTTATACACATCT
@@ -256,8 +144,10 @@ CutSeq supports various built-in adapter schemes for different NGS library prepa \ No newline at end of file