From e96a63e419525529b8408bbac5d433ab55aed18b Mon Sep 17 00:00:00 2001 From: Trenton w Fleming Date: Wed, 15 Nov 2023 00:09:47 -0500 Subject: [PATCH 1/4] Cloning refactor and data race fix (#393) * enzyme manager to avoid managing global state over the lifetime of a program * clone_test -> clone to make it easier to test and bench * remove concurrency * remove todos * expose some enzymes * fix lint * update changelog and remove pointer reciever. * moved example test to separate file for namespace clarity in rendered go doc examples. * renamed variables and added comments. * renamed variables and removed unnecessary control block. * added struct field name to changelog. * Golden Gate is no longer a receiver * update changelog --------- Co-authored-by: Timothy Stiles --- CHANGELOG.md | 9 +- clone/clone.go | 252 +++++++++++++++++++++--------------------- clone/clone_test.go | 173 +++++++++++++++-------------- clone/example_test.go | 31 ++++++ 4 files changed, 255 insertions(+), 210 deletions(-) create mode 100644 clone/example_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c2fe0c054..fc655a687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - `fastq` parser no longer becomes de-aligned when reading (#325) - `fastq` now handles optionals correctly (#323) +- No more data race in GoldenGate (#276) + +### Breaking +- CutWithEnzymeByName is now a receiver of EnzymeManager. GoldenGate now takes an Enzyme instead of the name of an enzyme. +This is an effort to remove dependence on some package level global state and build some flexibility managing enzymes +over the lifetime of the program. +- Enzyme.OverhangLen is now named Enzyme.OverhangLength ## [0.26.0] - 2023-07-22 Oops, we weren't keeping a changelog before this tag! [unreleased]: https://github.com/TimothyStiles/poly/compare/v0.26.0...main -[0.26.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.26.0 \ No newline at end of file +[0.26.0]: https://github.com/TimothyStiles/poly/releases/tag/v0.26.0 diff --git a/clone/clone.go b/clone/clone.go index e2cbf41ce..cc9939ba1 100644 --- a/clone/clone.go +++ b/clone/clone.go @@ -46,7 +46,6 @@ import ( "regexp" "sort" "strings" - "sync" "github.com/TimothyStiles/poly/checks" "github.com/TimothyStiles/poly/seqhash" @@ -83,15 +82,27 @@ type Enzyme struct { RegexpFor *regexp.Regexp RegexpRev *regexp.Regexp Skip int - OverhangLen int + OverheadLength int RecognitionSite string } -// Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase -var enzymeMap = map[string]Enzyme{ - "BsaI": {"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"}, - "BbsI": {"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"}, - "BtgZI": {"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"}, +// EnzymeManager manager for Enzymes. Allows for management of enzymes throughout the lifecyle of your +// program. EnzymeManager is not safe for concurrent use. +type EnzymeManager struct { + // enzymeMap Map of enzymes that exist for the lifetime of the manager. Not safe for concurrent use. + enzymeMap map[string]Enzyme +} + +// NewEnzymeManager creates a new EnzymeManager given some enzymes. +func NewEnzymeManager(enzymes []Enzyme) EnzymeManager { + enzymeMap := make(map[string]Enzyme) + for enzymeIndex := range enzymes { + enzymeMap[enzymes[enzymeIndex].Name] = enzymes[enzymeIndex] + } + + return EnzymeManager{ + enzymeMap: enzymeMap, + } } /****************************************************************************** @@ -100,30 +111,37 @@ Base cloning functions begin here. ******************************************************************************/ -func getBaseRestrictionEnzymes() map[string]Enzyme { - return enzymeMap -} - // CutWithEnzymeByName cuts a given sequence with an enzyme represented by the // enzyme's name. It is a convenience wrapper around CutWithEnzyme that // allows us to specify the enzyme by name. -func CutWithEnzymeByName(seq Part, directional bool, enzymeStr string) ([]Fragment, error) { - enzymeMap := getBaseRestrictionEnzymes() - if _, ok := enzymeMap[enzymeStr]; !ok { - return []Fragment{}, errors.New("Enzyme " + enzymeStr + " not found in enzymeMap") +func (enzymeManager EnzymeManager) CutWithEnzymeByName(part Part, directional bool, name string) ([]Fragment, error) { + // Get the enzyme from the enzyme map + enzyme, err := enzymeManager.GetEnzymeByName(name) + if err != nil { + // Return an error if there was an error + return []Fragment{}, err } - enzyme := enzymeMap[enzymeStr] - return CutWithEnzyme(seq, directional, enzyme), nil + // Cut the sequence with the enzyme + return CutWithEnzyme(part, directional, enzyme), nil +} + +// GetEnzymeByName gets the enzyme by it's name. If the enzyme manager does not +// contain an enzyme with the provided name, an error will be returned +func (enzymeManager EnzymeManager) GetEnzymeByName(name string) (Enzyme, error) { + if enzyme, ok := enzymeManager.enzymeMap[name]; ok { + return enzyme, nil + } + return Enzyme{}, errors.New("Enzyme " + name + " not found") } // CutWithEnzyme cuts a given sequence with an enzyme represented by an Enzyme struct. -func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { - var fragmentSeqs []string +func CutWithEnzyme(part Part, directional bool, enzyme Enzyme) []Fragment { + var fragmentSequences []string var sequence string - if seq.Circular { - sequence = strings.ToUpper(seq.Sequence + seq.Sequence) + if part.Circular { + sequence = strings.ToUpper(part.Sequence + part.Sequence) } else { - sequence = strings.ToUpper(seq.Sequence) + sequence = strings.ToUpper(part.Sequence) } // Check for palindromes @@ -135,20 +153,20 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { var reverseOverhangs []Overhang forwardCuts := enzyme.RegexpFor.FindAllStringIndex(sequence, -1) for _, forwardCut := range forwardCuts { - forwardOverhangs = append(forwardOverhangs, Overhang{Length: enzyme.OverhangLen, Position: forwardCut[1] + enzyme.Skip, Forward: true, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) + forwardOverhangs = append(forwardOverhangs, Overhang{Length: enzyme.OverheadLength, Position: forwardCut[1] + enzyme.Skip, Forward: true, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) } // Palindromic enzymes won't need reverseCuts if !palindromic { reverseCuts := enzyme.RegexpRev.FindAllStringIndex(sequence, -1) for _, reverseCut := range reverseCuts { - reverseOverhangs = append(reverseOverhangs, Overhang{Length: enzyme.OverhangLen, Position: reverseCut[0] - enzyme.Skip, Forward: false, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) + reverseOverhangs = append(reverseOverhangs, Overhang{Length: enzyme.OverheadLength, Position: reverseCut[0] - enzyme.Skip, Forward: false, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) } } - // If, on a linear sequence, the last overhang's position + EnzymeSkip + EnzymeOverhangLen is over the length of the sequence, remove that overhang. + // If, on a linear sequence, the last overhang's position + EnzymeSkip + EnzymeOverhangLength is over the length of the sequence, remove that overhang. for _, overhangSet := range [][]Overhang{forwardOverhangs, reverseOverhangs} { if len(overhangSet) > 0 { - if !seq.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.Skip+enzyme.OverhangLen > len(sequence)) { + if !part.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.Skip+enzyme.OverheadLength > len(sequence)) { overhangSet = overhangSet[:len(overhangSet)-1] } } @@ -166,26 +184,26 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { var nextOverhang Overhang // Linear fragments with 1 cut that are no directional will always give a // 2 fragments - if len(overhangs) == 1 && !directional && !seq.Circular { // Check the case of a single cut + if len(overhangs) == 1 && !directional && !part.Circular { // Check the case of a single cut // In the case of a single cut in a linear sequence, we get two fragments with only 1 stick end - fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length:] - fragmentSeq2 := sequence[:overhangs[0].Position] - overhangSeq := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] - fragments = append(fragments, Fragment{fragmentSeq1, overhangSeq, ""}) - fragments = append(fragments, Fragment{fragmentSeq2, "", overhangSeq}) + fragmentSequence1 := sequence[overhangs[0].Position+overhangs[0].Length:] + fragmentSequence2 := sequence[:overhangs[0].Position] + overhangSequence := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] + fragments = append(fragments, Fragment{fragmentSequence1, overhangSequence, ""}) + fragments = append(fragments, Fragment{fragmentSequence2, "", overhangSequence}) return fragments } // Circular fragments with 1 cut will always have 2 overhangs (because of the // concat earlier). If we don't require directionality, this will always get // cut into a single fragment - if len(overhangs) == 2 && !directional && seq.Circular { + if len(overhangs) == 2 && !directional && part.Circular { // In the case of a single cut in a circular sequence, we get one fragment out with sticky overhangs - fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length : len(seq.Sequence)] - fragmentSeq2 := sequence[:overhangs[0].Position] - fragmentSeq := fragmentSeq1 + fragmentSeq2 - overhangSeq := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] - fragments = append(fragments, Fragment{fragmentSeq, overhangSeq, overhangSeq}) + fragmentSequence1 := sequence[overhangs[0].Position+overhangs[0].Length : len(part.Sequence)] + fragmentSequence2 := sequence[:overhangs[0].Position] + fragmentSequence := fragmentSequence1 + fragmentSequence2 + overhangSequence := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] + fragments = append(fragments, Fragment{fragmentSequence, overhangSequence, overhangSequence}) return fragments } @@ -205,28 +223,28 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { // the basis of GoldenGate assembly. if directional && !palindromic { if currentOverhang.Forward && !nextOverhang.Forward { - fragmentSeqs = append(fragmentSeqs, sequence[currentOverhang.Position:nextOverhang.Position]) + fragmentSequences = append(fragmentSequences, sequence[currentOverhang.Position:nextOverhang.Position]) } // We have to subtract RecognitionSitePlusSkipLength in case we have a recognition site on // one side of the origin of a circular sequence and the cut site on the other side of the origin - if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(seq.Sequence) { + if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(part.Sequence) { break } } else { - fragmentSeqs = append(fragmentSeqs, sequence[currentOverhang.Position:nextOverhang.Position]) - if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(seq.Sequence) { + fragmentSequences = append(fragmentSequences, sequence[currentOverhang.Position:nextOverhang.Position]) + if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(part.Sequence) { break } } } // Convert fragment sequences into fragments - for _, fragment := range fragmentSeqs { + for _, fragmentsequence := range fragmentSequences { // Minimum lengths (given oligos) for assembly is 8 base pairs // https://doi.org/10.1186/1756-0500-3-291 - if len(fragment) > 8 { - fragmentSequence := fragment[enzyme.OverhangLen : len(fragment)-enzyme.OverhangLen] - forwardOverhang := fragment[:enzyme.OverhangLen] - reverseOverhang := fragment[len(fragment)-enzyme.OverhangLen:] + if len(fragmentsequence) > 8 { + fragmentSequence := fragmentsequence[enzyme.OverheadLength : len(fragmentsequence)-enzyme.OverheadLength] + forwardOverhang := fragmentsequence[:enzyme.OverheadLength] + reverseOverhang := fragmentsequence[len(fragmentsequence)-enzyme.OverheadLength:] fragments = append(fragments, Fragment{Sequence: fragmentSequence, ForwardOverhang: forwardOverhang, ReverseOverhang: reverseOverhang}) } } @@ -235,94 +253,73 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { return fragments } -func recurseLigate(wg *sync.WaitGroup, constructs chan string, infiniteLoopingConstructs chan string, seedFragment Fragment, fragmentList []Fragment, usedFragments []Fragment) { +func recurseLigate(seedFragment Fragment, fragmentList []Fragment, usedFragments []Fragment, existingSeqhashes map[string]struct{}) (openConstructs []string, infiniteConstructs []string) { // Recurse ligate simulates all possible ligations of a series of fragments. Each possible combination begins with a "seed" that fragments from the pool can be added to. - defer wg.Done() // If the seed ligates to itself, we can call it done with a successful circularization! if seedFragment.ForwardOverhang == seedFragment.ReverseOverhang { - constructs <- seedFragment.ForwardOverhang + seedFragment.Sequence - } else { - for _, newFragment := range fragmentList { - // If the seedFragment's reverse overhang is ligates to a fragment's forward overhang, we can ligate those together and seed another ligation reaction - var newSeed Fragment - var fragmentAttached bool - if seedFragment.ReverseOverhang == newFragment.ForwardOverhang { - fragmentAttached = true - newSeed = Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + newFragment.Sequence, seedFragment.ForwardOverhang, newFragment.ReverseOverhang} - } - // This checks if we can ligate the next fragment in its reverse direction. We have to be careful though - if our seed has a palindrome, it will ligate to itself - // like [-> <- -> <- -> ...] infinitely. We check for that case here as well. - if (seedFragment.ReverseOverhang == transform.ReverseComplement(newFragment.ReverseOverhang)) && (seedFragment.ReverseOverhang != transform.ReverseComplement(seedFragment.ReverseOverhang)) { // If the second statement isn't there, program will crash on palindromes - fragmentAttached = true - newSeed = Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + transform.ReverseComplement(newFragment.Sequence), seedFragment.ForwardOverhang, transform.ReverseComplement(newFragment.ForwardOverhang)} - } - - // If fragment is actually attached, move to some checks - if fragmentAttached { - // If the newFragment's reverse complement already exists in the used fragment list, we need to cancel the recursion. - for _, usedFragment := range usedFragments { - if usedFragment.Sequence == newFragment.Sequence { - infiniteLoopingConstructs <- usedFragment.ForwardOverhang + usedFragment.Sequence + usedFragment.ReverseOverhang - return - } - } - wg.Add(1) - // If everything is clear, append fragment to usedFragments and recurse. - usedFragments = append(usedFragments, newFragment) - go recurseLigate(wg, constructs, infiniteLoopingConstructs, newSeed, fragmentList, usedFragments) - } + construct := seedFragment.ForwardOverhang + seedFragment.Sequence + seqhash, _ := seqhash.Hash(construct, "DNA", true, true) + if _, ok := existingSeqhashes[seqhash]; ok { + return nil, nil } + existingSeqhashes[seqhash] = struct{}{} + return []string{construct}, nil } -} -func getConstructs(c chan string, constructSequences chan []string, circular bool) { - var constructs []string - var exists bool - var existingSeqhashes []string - for { - construct, more := <-c - if more { - exists = false - seqhashConstruct, _ := seqhash.Hash(construct, "DNA", circular, true) - // Check if this construct is unique - for _, existingSeqhash := range existingSeqhashes { - if existingSeqhash == seqhashConstruct { - exists = true + // If the seed ligates to another fragment, we can recurse and add that fragment to the seed + for _, newFragment := range fragmentList { + // If the seedFragment's reverse overhang is ligates to a fragment's forward overhang, we can ligate those together and seed another ligation reaction + var newSeed Fragment + var fragmentAttached bool + if seedFragment.ReverseOverhang == newFragment.ForwardOverhang { + fragmentAttached = true + newSeed = Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + newFragment.Sequence, seedFragment.ForwardOverhang, newFragment.ReverseOverhang} + } + // This checks if we can ligate the next fragment in its reverse direction. We have to be careful though - if our seed has a palindrome, it will ligate to itself + // like [-> <- -> <- -> ...] infinitely. We check for that case here as well. + if (seedFragment.ReverseOverhang == transform.ReverseComplement(newFragment.ReverseOverhang)) && (seedFragment.ReverseOverhang != transform.ReverseComplement(seedFragment.ReverseOverhang)) { // If the second statement isn't there, program will crash on palindromes + fragmentAttached = true + newSeed = Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + transform.ReverseComplement(newFragment.Sequence), seedFragment.ForwardOverhang, transform.ReverseComplement(newFragment.ForwardOverhang)} + } + + // If fragment is actually attached, move to some checks + if fragmentAttached { + // If the newFragment's reverse complement already exists in the used fragment list, we need to cancel the recursion. + for _, usedFragment := range usedFragments { + if usedFragment.Sequence == newFragment.Sequence { + infiniteConstruct := usedFragment.ForwardOverhang + usedFragment.Sequence + usedFragment.ReverseOverhang + seqhash, _ := seqhash.Hash(infiniteConstruct, "DNA", false, true) + if _, ok := existingSeqhashes[seqhash]; ok { + return nil, nil + } + existingSeqhashes[seqhash] = struct{}{} + return nil, []string{infiniteConstruct} } } - if !exists { - constructs = append(constructs, construct) - existingSeqhashes = append(existingSeqhashes, seqhashConstruct) - } - } else { - constructSequences <- constructs - close(constructSequences) - return + // If everything is clear, append fragment to usedFragments and recurse. + usedFragments = append(usedFragments, newFragment) + openconstructs, infiniteconstructs := recurseLigate(newSeed, fragmentList, usedFragments, existingSeqhashes) + + openConstructs = append(openConstructs, openconstructs...) + infiniteConstructs = append(infiniteConstructs, infiniteconstructs...) } } + + return openConstructs, infiniteConstructs } // CircularLigate simulates ligation of all possible fragment combinations into circular plasmids. -func CircularLigate(fragments []Fragment) ([]string, []string, error) { - var wg sync.WaitGroup +func CircularLigate(fragments []Fragment) ([]string, []string) { var outputConstructs []string var outputInfiniteLoopingConstructs []string - constructs := make(chan string) - infiniteLoopingConstructs := make(chan string) // sometimes we will get stuck in infinite loops. These are sequences with a recursion break - constructSequences := make(chan []string) - infiniteLoopingConstructSequences := make(chan []string) + existingSeqhashes := make(map[string]struct{}) for _, fragment := range fragments { - wg.Add(1) - go recurseLigate(&wg, constructs, infiniteLoopingConstructs, fragment, fragments, []Fragment{}) + openConstructs, infiniteConstructs := recurseLigate(fragment, fragments, []Fragment{}, existingSeqhashes) + + outputConstructs = append(outputConstructs, openConstructs...) + outputInfiniteLoopingConstructs = append(outputInfiniteLoopingConstructs, infiniteConstructs...) } - go getConstructs(constructs, constructSequences, true) - go getConstructs(infiniteLoopingConstructs, infiniteLoopingConstructSequences, false) - wg.Wait() - close(constructs) - close(infiniteLoopingConstructs) - outputConstructs = <-constructSequences - outputInfiniteLoopingConstructs = <-infiniteLoopingConstructSequences - return outputConstructs, outputInfiniteLoopingConstructs, nil + return outputConstructs, outputInfiniteLoopingConstructs } /****************************************************************************** @@ -333,14 +330,21 @@ Specific cloning functions begin here. // GoldenGate simulates a GoldenGate cloning reaction. As of right now, we only // support BsaI, BbsI, BtgZI, and BsmBI. -func GoldenGate(sequences []Part, enzymeStr string) ([]string, []string, error) { +func GoldenGate(sequences []Part, cuttingEnzyme Enzyme) (openConstructs []string, infiniteLoops []string) { var fragments []Fragment for _, sequence := range sequences { - newFragments, err := CutWithEnzymeByName(sequence, true, enzymeStr) - if err != nil { - return []string{}, []string{}, err - } + newFragments := CutWithEnzyme(sequence, true, cuttingEnzyme) fragments = append(fragments, newFragments...) } - return CircularLigate(fragments) + openconstructs, infiniteloops := CircularLigate(fragments) + return openconstructs, infiniteloops +} + +// GetBaseRestrictionEnzymes return a basic slice of common enzymes used in Golden Gate Assembly. Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase +func GetBaseRestrictionEnzymes() []Enzyme { + return []Enzyme{ + {"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"}, + {"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"}, + {"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"}, + } } diff --git a/clone/clone_test.go b/clone/clone_test.go index 2cfb57615..fb5ade198 100644 --- a/clone/clone_test.go +++ b/clone/clone_test.go @@ -1,55 +1,53 @@ -package clone_test +package clone import ( - "fmt" "testing" - - "github.com/TimothyStiles/poly/clone" - "github.com/TimothyStiles/poly/seqhash" ) // pOpen plasmid series (https://stanford.freegenes.org/collections/open-genes/products/open-plasmids#description). I use it for essentially all my cloning. -Keoni -var popen = clone.Part{"TAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGTAgtcttcGCcatcgCtACTAAAagccagataacagtatgcgtatttgcgcgctgatttttgcggtataagaatatatactgatatgtatacccgaagtatgtcaaaaagaggtatgctatgaagcagcgtattacagtgacagttgacagcgacagctatcagttgctcaaggcatatatgatgtcaatatctccggtctggtaagcacaaccatgcagaatgaagcccgtcgtctgcgtgccgaacgctggaaagcggaaaatcaggaagggatggctgaggtcgcccggtttattgaaatgaacggctcttttgctgacgagaacagggGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATTAACCTGATGTTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGgcgatgttgaagaccaCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGG", true} +var popen = Part{"TAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGTAgtcttcGCcatcgCtACTAAAagccagataacagtatgcgtatttgcgcgctgatttttgcggtataagaatatatactgatatgtatacccgaagtatgtcaaaaagaggtatgctatgaagcagcgtattacagtgacagttgacagcgacagctatcagttgctcaaggcatatatgatgtcaatatctccggtctggtaagcacaaccatgcagaatgaagcccgtcgtctgcgtgccgaacgctggaaagcggaaaatcaggaagggatggctgaggtcgcccggtttattgaaatgaacggctcttttgctgacgagaacagggGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATTAACCTGATGTTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGgcgatgttgaagaccaCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGG", true} func TestCutWithEnzymeByName(t *testing.T) { - _, err := clone.CutWithEnzymeByName(popen, true, "EcoFake") + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) + _, err := enzymeManager.CutWithEnzymeByName(popen, true, "EcoFake") if err == nil { t.Errorf("CutWithEnzymeByName should have failed when looking for fake restriction enzyme EcoFake") } } func TestCutWithEnzyme(t *testing.T) { - var seq clone.Part + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) + var sequence Part bsai := "GGTCTCAATGC" bsaiComplement := "ATGCAGAGACC" // test(1) // Test case of `<-bsaiComplement bsai-> <-bsaiComplement bsai->` where bsaI cuts off of a linear sequence. This tests the line: - // if !seq.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.EnzymeSkip+enzyme.EnzymeOverhangLen > len(sequence)) - seq = clone.Part{"ATATATA" + bsaiComplement + bsai + "ATGCATCGATCGACTAGCATG" + bsaiComplement + bsai[:8], false} - frag, err := clone.CutWithEnzymeByName(seq, true, "BsaI") + // if !sequence.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.EnzymeSkip+enzyme.EnzymeOverhangLen > len(sequence)) + sequence = Part{"ATATATA" + bsaiComplement + bsai + "ATGCATCGATCGACTAGCATG" + bsaiComplement + bsai[:8], false} + fragment, err := enzymeManager.CutWithEnzymeByName(sequence, true, "BsaI") if err != nil { t.Errorf("CutWithEnzyme should not have failed on test(1). Got error: %s", err) } - if len(frag) != 1 { + if len(fragment) != 1 { t.Errorf("CutWithEnzyme in test(1) should be 1 fragment in length") } - if frag[0].Sequence != "ATGCATCGATCGACTAGCATG" { - t.Errorf("CutWithEnzyme in test(1) should give fragment with sequence ATGCATCGATCGACTAGCATG . Got sequence: %s", frag[0].Sequence) + if fragment[0].Sequence != "ATGCATCGATCGACTAGCATG" { + t.Errorf("CutWithEnzyme in test(1) should give fragment with sequence ATGCATCGATCGACTAGCATG . Got sequence: %s", fragment[0].Sequence) } // test(2) // Now if we take the same sequence and circularize it, we get a different result - seq.Circular = true - frag, err = clone.CutWithEnzymeByName(seq, true, "BsaI") + sequence.Circular = true + fragment, err = enzymeManager.CutWithEnzymeByName(sequence, true, "BsaI") if err != nil { t.Errorf("CutWithEnzyme should not have failed on test(2). Got error: %s", err) } - if len(frag) != 2 { + if len(fragment) != 2 { t.Errorf("CutWithEnzyme in test(2) should be 1 fragment in length") } - if frag[0].Sequence != "ATGCATCGATCGACTAGCATG" || frag[1].Sequence != "TATA" { - t.Errorf("CutWithEnzyme in test(2) should give fragment with sequence ATGCATCGATCGACTAGCATG and TATA. Got sequence: %s and %s", frag[0].Sequence, frag[1].Sequence) + if fragment[0].Sequence != "ATGCATCGATCGACTAGCATG" || fragment[1].Sequence != "TATA" { + t.Errorf("CutWithEnzyme in test(2) should give fragment with sequence ATGCATCGATCGACTAGCATG and TATA. Got sequence: %s and %s", fragment[0].Sequence, fragment[1].Sequence) } // test(3) @@ -57,43 +55,43 @@ func TestCutWithEnzyme(t *testing.T) { // different results if we have a linear or circular DNA. Since single cuts // will give no fragments if you test for directionality, we set the // directionality flag to false. This tests the line: - // if len(overhangs) == 1 && !directional && !seq.Circular - seq = clone.Part{"ATATATATATATATAT" + bsai + "GCGCGCGCGCGCGCGCGCGC", false} - frag, err = clone.CutWithEnzymeByName(seq, false, "BsaI") + // if len(overhangs) == 1 && !directional && !sequence.Circular + sequence = Part{"ATATATATATATATAT" + bsai + "GCGCGCGCGCGCGCGCGCGC", false} + fragment, err = enzymeManager.CutWithEnzymeByName(sequence, false, "BsaI") if err != nil { t.Errorf("CutWithEnzyme should not have failed on test(3). Got error: %s", err) } - if len(frag) != 2 { + if len(fragment) != 2 { t.Errorf("Cutting a linear fragment with a single cut site should give 2 fragments") } - if frag[0].Sequence != "GCGCGCGCGCGCGCGCGCGC" || frag[1].Sequence != "ATATATATATATATATGGTCTCA" { - t.Errorf("CutWithEnzyme in test(3) should give fragment with sequence GCGCGCGCGCGCGCGCGCGC and ATATATATATATATATGGTCTCA. Got sequence: %s and %s", frag[0].Sequence, frag[1].Sequence) + if fragment[0].Sequence != "GCGCGCGCGCGCGCGCGCGC" || fragment[1].Sequence != "ATATATATATATATATGGTCTCA" { + t.Errorf("CutWithEnzyme in test(3) should give fragment with sequence GCGCGCGCGCGCGCGCGCGC and ATATATATATATATATGGTCTCA. Got sequence: %s and %s", fragment[0].Sequence, fragment[1].Sequence) } // test(4) // This tests for the above except with a circular fragment. Specifically, it // tests the line: - // if len(overhangs) == 2 && !directional && seq.Circular - seq.Circular = true - frag, err = clone.CutWithEnzymeByName(seq, false, "BsaI") + // if len(overhangs) == 2 && !directional && sequence.Circular + sequence.Circular = true + fragment, err = enzymeManager.CutWithEnzymeByName(sequence, false, "BsaI") if err != nil { t.Errorf("CutWithEnzyme should not have failed on test(4). Got error: %s", err) } - if len(frag) != 1 { + if len(fragment) != 1 { t.Errorf("Cutting a circular fragment with a single cut site should give 1 fragments") } - if frag[0].Sequence != "GCGCGCGCGCGCGCGCGCGCATATATATATATATATGGTCTCA" { - t.Errorf("CutWithEnzyme in test(4) should give fragment with sequence ATATATATATATATATGGTCTCA. Got Sequence: %s", frag[0].Sequence) + if fragment[0].Sequence != "GCGCGCGCGCGCGCGCGCGCATATATATATATATATGGTCTCA" { + t.Errorf("CutWithEnzyme in test(4) should give fragment with sequence ATATATATATATATATGGTCTCA. Got Sequence: %s", fragment[0].Sequence) } // test(5) // This tests if we have a fragment where we do not care about directionality // but have more than 1 cut site in our fragment. We can use pOpen for this. - frag, err = clone.CutWithEnzymeByName(popen, false, "BbsI") + fragment, err = enzymeManager.CutWithEnzymeByName(popen, false, "BbsI") if err != nil { t.Errorf("CutWithEnzyme should not have failed on test(5). Got error: %s", err) } - if len(frag) != 2 { + if len(fragment) != 2 { t.Errorf("Cutting pOpen without a direction should yield 2 fragments") } } @@ -101,12 +99,9 @@ func TestCutWithEnzyme(t *testing.T) { func TestCircularLigate(t *testing.T) { // The following tests for complementing overhangs. Specific, this line: // newSeed := Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + ReverseComplement(newFragment.Sequence), seedFragment.ForwardOverhang, ReverseComplement(newFragment.ForwardOverhang)} - fragment1 := clone.Fragment{"AAAAAA", "GTTG", "CTAT"} - fragment2 := clone.Fragment{"AAAAAA", "CAAC", "ATAG"} - outputConstructs, infiniteLoops, err := clone.CircularLigate([]clone.Fragment{fragment1, fragment2}) - if err != nil { - t.Errorf("Failed circular ligation with error: %s", err) - } + fragment1 := Fragment{"AAAAAA", "GTTG", "CTAT"} + fragment2 := Fragment{"AAAAAA", "CAAC", "ATAG"} + outputConstructs, infiniteLoops := CircularLigate([]Fragment{fragment1, fragment2}) if len(outputConstructs) != 1 { t.Errorf("Circular ligation with complementing overhangs should only output 1 valid rotated sequence.") } @@ -115,53 +110,39 @@ func TestCircularLigate(t *testing.T) { } } -func TestGoldenGate(t *testing.T) { - // Here we test if the enzyme we want to use in a GoldenGate reaction does not exist in our enzyme pool - fragment1 := clone.Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} - fragment2 := clone.Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} - - _, _, err := clone.GoldenGate([]clone.Part{fragment1, fragment2, popen}, "EcoRFake") +func TestEnzymeManage_GetEnzymeByName_NotFound(t *testing.T) { + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) + _, err := enzymeManager.GetEnzymeByName("EcoRFake") if err == nil { t.Errorf("GoldenGate should fail when using enzyme EcoRFake") } - if err.Error() != "Enzyme EcoRFake not found in enzymeMap" { + if err.Error() != "Enzyme EcoRFake not found" { t.Errorf("Failure of GoldenGate on incorrect enzyme should follow the exact string `Enzyme EcoRFake not found in enzymeMap`. Got: %s", err.Error()) } } -func ExampleGoldenGate() { - // Fragment 1 has a palindrome at its start. This isn't very common but - // can occur. These two fragments are real DNA fragments used in the - // FreeGenes Project. They are used because they were on my computer - // - Keoni - fragment1 := clone.Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} - fragment2 := clone.Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} - - Clones, _, _ := clone.GoldenGate([]clone.Part{fragment1, fragment2, popen}, "BbsI") - - fmt.Println(seqhash.RotateSequence(Clones[0])) - // Output: AAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAG -} - func TestSignalKilledGoldenGate(t *testing.T) { + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) // This previously would crash from using too much RAM. - frag1 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATGGAGGGTCTCAAGGTGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag2 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag3 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag4 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag5 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag6 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag7 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATCCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag8 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - frag9 := clone.Part{"AAAGCACTCTTAGGCCTCTGGAAGACATATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCAAAAAGAGACCCGCTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} - fragments := []clone.Part{popen, frag1, frag2, frag3, frag4, frag5, frag6, frag7, frag8, frag9} - - clones, loopingClones, err := clone.GoldenGate(fragments, "BbsI") + fragment1 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATGGAGGGTCTCAAGGTGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment2 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment3 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment4 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment5 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment6 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment7 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATCCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment8 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragment9 := Part{"AAAGCACTCTTAGGCCTCTGGAAGACATATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCAAAAAGAGACCCGCTTAGTCTTCGCATTTCTTAATCGGTGCCC", false} + fragments := []Part{popen, fragment1, fragment2, fragment3, fragment4, fragment5, fragment6, fragment7, fragment8, fragment9} + + bbsI, err := enzymeManager.GetEnzymeByName("BbsI") if err != nil { - t.Errorf("GoldenGate should not fail with these fragments. Got error: %s", err) + t.Errorf("Error when getting Enzyme. Got error: %s", err) } + + clones, loopingClones := GoldenGate(fragments, bbsI) if len(clones) != 1 { - t.Errorf("There should be 1 output clone. Got: %d", len(clones)) + t.Errorf("There should be 1 output Got: %d", len(clones)) } // This should be changed later when we have a better way of informing user of reused overhangs if len(loopingClones) != 4 { @@ -170,27 +151,31 @@ func TestSignalKilledGoldenGate(t *testing.T) { } func TestPanicGoldenGate(t *testing.T) { + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) // This used to panic with the message: // panic: runtime error: slice bounds out of range [:-2] [recovered] // It was from the following sequence: GAAGACATAATGGTCTTC . There are 2 intercepting BbsI sites. - frag1 := clone.Part{"AAACCGGAGCCATACAGTACGAAGACATGGAGGGTCTCAAATGAAAAAAATCATCGAAACCCAGCGTGCACCGGGAGCAATCGGACCGTACGTCCAGGGAGTCGACCTAGGATCAATGTAGTCTTCGCACTTGGCTTAGATGCAAC", false} - frag2 := clone.Part{"AAACCGGAGCCATACAGTACGAAGACATAATGGTCTTCACCTCAGGACAGATCCCGGTCTGCCCGCAGACCGGAGAAATCCCGGCAGACGTCCAGGACCAGGCACGTCTATCACTAGATAGTCTTCGCACTTGGCTTAGATGCAAC", false} - frag3 := clone.Part{"AAACCGGAGCCATACAGTACGAAGACATTAGAAAACGTCAAAGCAATCGTCGTCGCAGCAGGACTATCAGTCGGAGACATCATCAAAATGACCGTCTTCATCACCGACCTAAACGACTTAGTCTTCGCACTTGGCTTAGATGCAAC", false} - frag4 := clone.Part{"AAACCGGAGCCATACAGTACGAAGACATGACTTCGCAACCATCAACGAAGTCTACAAACAGTTCTTCGACGAACACCAGGCAACCTACCCGACCCGTTCATGCGTCCAGGTCGCACGTCTACTAGTCTTCGCACTTGGCTTAGATGCAAC", false} - frag5 := clone.Part{"AAACCGGAGCCATACAGTACGAAGACATCTACCGAAAGACGTCAAACTAGAAATCGAAGCAATCGCAGTCCGTTCAGCAAGAGCTTAGAGACCCGCTTAGTCTTCGCACTTGGCTTAGATGCAAC", false} - fragments := []clone.Part{popen, frag1, frag2, frag3, frag4, frag5} - - _, _, err := clone.GoldenGate(fragments, "BbsI") + fragment1 := Part{"AAACCGGAGCCATACAGTACGAAGACATGGAGGGTCTCAAATGAAAAAAATCATCGAAACCCAGCGTGCACCGGGAGCAATCGGACCGTACGTCCAGGGAGTCGACCTAGGATCAATGTAGTCTTCGCACTTGGCTTAGATGCAAC", false} + fragment2 := Part{"AAACCGGAGCCATACAGTACGAAGACATAATGGTCTTCACCTCAGGACAGATCCCGGTCTGCCCGCAGACCGGAGAAATCCCGGCAGACGTCCAGGACCAGGCACGTCTATCACTAGATAGTCTTCGCACTTGGCTTAGATGCAAC", false} + fragment3 := Part{"AAACCGGAGCCATACAGTACGAAGACATTAGAAAACGTCAAAGCAATCGTCGTCGCAGCAGGACTATCAGTCGGAGACATCATCAAAATGACCGTCTTCATCACCGACCTAAACGACTTAGTCTTCGCACTTGGCTTAGATGCAAC", false} + fragment4 := Part{"AAACCGGAGCCATACAGTACGAAGACATGACTTCGCAACCATCAACGAAGTCTACAAACAGTTCTTCGACGAACACCAGGCAACCTACCCGACCCGTTCATGCGTCCAGGTCGCACGTCTACTAGTCTTCGCACTTGGCTTAGATGCAAC", false} + fragment5 := Part{"AAACCGGAGCCATACAGTACGAAGACATCTACCGAAAGACGTCAAACTAGAAATCGAAGCAATCGCAGTCCGTTCAGCAAGAGCTTAGAGACCCGCTTAGTCTTCGCACTTGGCTTAGATGCAAC", false} + fragments := []Part{popen, fragment1, fragment2, fragment3, fragment4, fragment5} + + bbsI, err := enzymeManager.GetEnzymeByName("BbsI") if err != nil { - t.Errorf("GoldenGate should not fail with these fragments. Got error: %s", err) + t.Errorf("Error when getting Enzyme. Got error: %s", err) } + + _, _ = GoldenGate(fragments, bbsI) } func TestCircularCutRegression(t *testing.T) { + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) // This used to error with 0 fragments since the BsaI cut site is on the other // side of the origin from its recognition site. - plasmid1 := clone.Part{"AAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCCGAGaccaagtcgcggccgcgaggtgtcaatcgtcggagtagggataacagggtaatccgctgagcaataactagcataaccccttggggcctctaaacgggtcttgaggggttttttgcatggtcatagctgtttcctgttacgccccgccctgccactcgtcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgacacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagggatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccatccttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcatttccataggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagtaaaacgacggccagtagtcaaaagcctccgaccggaggcttttgacttggttcaggtggagtggcggccgcgacttgGTCTC", true} - newFragments, err := clone.CutWithEnzymeByName(plasmid1, true, "BsaI") + plasmid1 := Part{"AAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCCGAGaccaagtcgcggccgcgaggtgtcaatcgtcggagtagggataacagggtaatccgctgagcaataactagcataaccccttggggcctctaaacgggtcttgaggggttttttgcatggtcatagctgtttcctgttacgccccgccctgccactcgtcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgacacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagggatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccatccttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgccgatcatttccataggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagtaaaacgacggccagtagtcaaaagcctccgaccggaggcttttgacttggttcaggtggagtggcggccgcgacttgGTCTC", true} + newFragments, err := enzymeManager.CutWithEnzymeByName(plasmid1, true, "BsaI") if err != nil { t.Errorf("Failed to cut: %s", err) } @@ -198,3 +183,21 @@ func TestCircularCutRegression(t *testing.T) { t.Errorf("Expected 1 new fragment, got: %d", len(newFragments)) } } + +func benchmarkGoldenGate(b *testing.B, enzymeManager EnzymeManager, parts []Part) { + bbsI, err := enzymeManager.GetEnzymeByName("BbsI") + if err != nil { + b.Errorf("Error when getting Enzyme. Got error: %s", err) + } + for n := 0; n < b.N; n++ { + _, _ = GoldenGate(parts, bbsI) + } +} + +func BenchmarkGoldenGate3Parts(b *testing.B) { + enzymeManager := NewEnzymeManager(GetBaseRestrictionEnzymes()) + fragment1 := Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + fragment2 := Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + + benchmarkGoldenGate(b, enzymeManager, []Part{fragment1, fragment2, popen}) +} diff --git a/clone/example_test.go b/clone/example_test.go new file mode 100644 index 000000000..85d12dc60 --- /dev/null +++ b/clone/example_test.go @@ -0,0 +1,31 @@ +package clone_test + +import ( + "fmt" + "log" + + "github.com/TimothyStiles/poly/clone" + "github.com/TimothyStiles/poly/seqhash" +) + +func ExampleGoldenGate() { + enzymeManager := clone.NewEnzymeManager(clone.GetBaseRestrictionEnzymes()) + // Fragment 1 has a palindrome at its start. This isn't very common but + // can occur. These two fragments are real DNA fragments used in the + // FreeGenes Project. They are used because they were on my computer + // - Keoni + fragment1 := clone.Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + fragment2 := clone.Part{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + + // pOpen plasmid series (https://stanford.freegenes.org/collections/open-genes/products/open-plasmids#description). I use it for essentially all my cloning. -Keoni + var popen = clone.Part{"TAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGTAgtcttcGCcatcgCtACTAAAagccagataacagtatgcgtatttgcgcgctgatttttgcggtataagaatatatactgatatgtatacccgaagtatgtcaaaaagaggtatgctatgaagcagcgtattacagtgacagttgacagcgacagctatcagttgctcaaggcatatatgatgtcaatatctccggtctggtaagcacaaccatgcagaatgaagcccgtcgtctgcgtgccgaacgctggaaagcggaaaatcaggaagggatggctgaggtcgcccggtttattgaaatgaacggctcttttgctgacgagaacagggGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATTAACCTGATGTTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGgcgatgttgaagaccaCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGG", true} + + bbsI, err := enzymeManager.GetEnzymeByName("BbsI") + if err != nil { + log.Fatalf("Something went wrong when trying to get the enzyme. Got error: %s", err) + } + Clones, _ := clone.GoldenGate([]clone.Part{fragment1, fragment2, popen}, bbsI) + + fmt.Println(seqhash.RotateSequence(Clones[0])) + // Output: AAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAG +} From d87246c3d42d92b5554237abdc174fcb158851d1 Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 14 Nov 2023 21:11:51 -0800 Subject: [PATCH 2/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b704eea2d..97abd1a36 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Poly is a Go package for engineering organisms. * **[Library](https://pkg.go.dev/github.com/TimothyStiles/poly#pkg-examples)** -* **[Tutorials](https://github.com/TimothyStiles/poly/tree/main/tutorials): ([live](https://gitpod.io/#tutorial=true/https://github.com/TimothyStiles/poly) | [github](https://github.com/TimothyStiles/poly/tree/main/tutorials))** +* **[Tutorials](https://github.com/TimothyStiles/poly/tree/main/tutorials)** * **[Learning Synbio](https://github.com/TimothyStiles/how-to-synbio)** From 96b5cff167667ceb1f1eadb48b301ca189243706 Mon Sep 17 00:00:00 2001 From: Maximilien Rothier Bautzer Date: Wed, 15 Nov 2023 05:23:14 +0000 Subject: [PATCH 3/4] Add stats to codon tables (#350) * add stats to codon tables * separate stats into its own codon analyzer struct * stats struct * turn codonTable into TranslationTable, which holds the required data for translation and optimisation * generate translation and optimisation maps in constructors * update the Compromise and Add funcs to copy the first table when merging (no risk of changing data through pointer) * add gene count to stats * DRY tests with new structures * DRY examples with new structures * update synthesis code to use the codontable's Optimize func * update amino acid weights through setter * bubble up table weight update errs * make stats part of the table struct so we can track them across a table's lifetime, which is probably closer to what we want * make translation table stats public rather than use a getter * move stats below other interfaces * add translate to table interface * update changelog * add weight update err test * newAminoAcidChoosers test * UpdateWeights test * Compromise/Add codon table tests * add an example for using manual codon weights --------- Co-authored-by: Willow Carretero Chavez --- CHANGELOG.md | 1 + synthesis/codon/codon.go | 403 ++++++++++++++++---------- synthesis/codon/codon_test.go | 496 +++++++++++++++++++++----------- synthesis/codon/example_test.go | 187 ++++++------ synthesis/fix/synthesis.go | 4 +- synthesis/fix/synthesis_test.go | 4 +- 6 files changed, 664 insertions(+), 431 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc655a687..7b564df7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305) - Added a parser and writer for the `pileup` sequence alignment format (#329) +- Added statistics to the `synthesis/codon` package (keeping track of the observed start codon occurrences in a translation table) (#350) ### Fixed - `fastq` parser no longer becomes de-aligned when reading (#325) diff --git a/synthesis/codon/codon.go b/synthesis/codon/codon.go index 9a44b69fd..5fd51de11 100644 --- a/synthesis/codon/codon.go +++ b/synthesis/codon/codon.go @@ -26,37 +26,35 @@ import ( "strings" "time" + "github.com/TimothyStiles/poly/io/genbank" weightedRand "github.com/mroth/weightedrand" ) /****************************************************************************** -Oct, 15, 2020 - File is structured as so: - Interfaces: - Table - specifies the functions that all table types must implement + Interfaces: + Table - An interface encompassing what a potentially codon optimized Translation table can do + + Structs: + TranslationTable - contains a weighted codon table, which is used when translating and optimizing sequences. The weights can be updated through the codon frequencies we observe in given DNA sequences. - Structs: - codonTable - holds all information mapping codons <-> amino acids during transformations. AminoAcid - holds amino acid related info for codonTable struct - Codon - holds codon related info for AminoAcid struct - Big functions that everything else is related to: + Codon - holds codon related info for AminoAcid struct - Translate - given a nucleic sequence string and codon table it translates sequences - to UPPERCASE amino acid sequences. + Key functions: + TranslationTable.Translate - given a nucleic sequence string and codon table it translates sequences to UPPERCASE amino acid sequences. - Optimize - given an amino acid sequence string and codon table it translates - sequences to UPPERCASE nucleic acid sequences. + TranslationTable.OptimizeSequence - will return a set of codons which can be used to encode the given amino acid sequence. The codons picked are weighted according to the computed translation table's weights -Anywho, most of this file and codonTable's struct methods are meant to help overcome -this codon bias. There's a default codonTable generator near the bottom of this file -with a whole section on how it works and why it's gotta be that way. + TranslationTable.UpdateWeightsWithSequence - will look at the coding regions in the given genbank data, and use those to generate new weights for the codons in the translation table. The next time a sequence is optimised, it will use those updated weights. + + TranslationTable.Stats - a set of statistics we maintain throughout the translation table's lifetime. For example we track the start codons observed when we update the codon table's weights with other DNA sequences ******************************************************************************/ var ( - errEmptyCodonTable = errors.New("empty codon table") + errNoCodingRegions = errors.New("no coding regions found") errEmptyAminoAcidString = errors.New("empty amino acid string") errEmptySequenceString = errors.New("empty sequence string") newChooserFn = weightedRand.NewChooser @@ -83,74 +81,69 @@ type AminoAcid struct { Codons []Codon `json:"codons"` } -// Table is an interface that specifies the functions that all table types must implement +// Table is an interface encompassing what a potentially codon optimized Translation table can do type Table interface { - Chooser() (map[string]weightedRand.Chooser, error) - GenerateTranslationTable() map[string]string - GenerateStartCodonTable() map[string]string - GetAminoAcids() []AminoAcid - GetStartCodons() []string - GetStopCodons() []string - IsEmpty() bool - OptimizeTable(string) Table + GetWeightedAminoAcids() []AminoAcid + OptimizeSequence(aminoAcids string, randomState ...int) (string, error) + Translate(dnaSeq string) (string, error) +} + +// Stats denotes a set of statistics we maintain throughout the translation table's lifetime. For example we track +// the start codons observed when we update the codon table's weights with other DNA sequences +type Stats struct { + StartCodonCount map[string]int + GeneCount int } -// codonTable holds information for a codon table. -type codonTable struct { +// NewStats returns a new instance of codon statistics (a set of statistics we maintain throughout a translation table's lifetime) +func NewStats() *Stats { + return &Stats{ + StartCodonCount: map[string]int{}, + } +} + +// TranslationTable contains a weighted codon table, which is used when translating and optimizing sequences. The +// weights can be updated through the codon frequencies we observe in given DNA sequences. +type TranslationTable struct { StartCodons []string `json:"start_codons"` StopCodons []string `json:"stop_codons"` AminoAcids []AminoAcid `json:"amino_acids"` -} -// Translate translates a codon sequence to an amino acid sequence -func Translate(sequence string, codonTable Table) (string, error) { - if codonTable.IsEmpty() { - return "", errEmptyCodonTable - } - if len(sequence) == 0 { - return "", errEmptySequenceString - } + TranslationMap map[string]string + StartCodonTable map[string]string + Choosers map[string]weightedRand.Chooser - var aminoAcids strings.Builder - var currentCodon strings.Builder - translationTable := codonTable.GenerateTranslationTable() - startCodonTable := codonTable.GenerateStartCodonTable() + Stats *Stats +} - startCodonReached := false - for _, letter := range sequence { - // add current nucleotide to currentCodon - currentCodon.WriteRune(letter) +// Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another +// process, since the tables are generated at build time. +func (table *TranslationTable) Copy() *TranslationTable { + return &TranslationTable{ + StartCodons: table.StartCodons, + StopCodons: table.StopCodons, + AminoAcids: table.AminoAcids, - // if current nucleotide is the third in a codon translate to aminoAcid write to aminoAcids and reset currentCodon. - // use start codon table for the first codon only, erroring out if an invalid start codon is provided - if currentCodon.Len() == 3 { - if startCodonReached { - aminoAcids.WriteString(translationTable[strings.ToUpper(currentCodon.String())]) - } else { - aminoAcid, ok := startCodonTable[strings.ToUpper(currentCodon.String())] - if !ok { - return "", fmt.Errorf("start codon %q is not in start codon table %v", currentCodon.String(), startCodonTable) - } - aminoAcids.WriteString(aminoAcid) - startCodonReached = true - } + StartCodonTable: table.StartCodonTable, + TranslationMap: table.TranslationMap, + Choosers: table.Choosers, - // reset codon string builder for next codon. - currentCodon.Reset() - } + Stats: table.Stats, } - return aminoAcids.String(), nil } -// Optimize takes an amino acid sequence and codonTable and returns an optimized codon sequence. Takes an optional random seed as last argument. -func Optimize(aminoAcids string, codonTable Table, randomState ...int) (string, error) { +// GetWeightedAminoAcids returns the amino acids along with their associated codon weights +func (table *TranslationTable) GetWeightedAminoAcids() []AminoAcid { + return table.AminoAcids +} + +// OptimizeSequence will return a set of codons which can be used to encode the given amino acid sequence. The codons +// picked are weighted according to the computed translation table's weights +func (table *TranslationTable) OptimizeSequence(aminoAcids string, randomState ...int) (string, error) { // Finding any given aminoAcid is dependent upon it being capitalized, so // we do that here. aminoAcids = strings.ToUpper(aminoAcids) - if codonTable.IsEmpty() { - return "", errEmptyCodonTable - } if len(aminoAcids) == 0 { return "", errEmptyAminoAcidString } @@ -163,45 +156,149 @@ func Optimize(aminoAcids string, codonTable Table, randomState ...int) (string, } var codons strings.Builder - codonChooser, err := codonTable.Chooser() - if err != nil { - return "", err - } + codonChooser := table.Choosers for _, aminoAcid := range aminoAcids { chooser, ok := codonChooser[string(aminoAcid)] if !ok { return "", invalidAminoAcidError{aminoAcid} } + codons.WriteString(chooser.Pick().(string)) } + return codons.String(), nil } -// OptimizeTable weights each codon in a codon table according to input string codon frequency. -// This function actually mutates the codonTable struct itself. -func (table codonTable) OptimizeTable(sequence string) Table { +// UpdateWeights will update the translation table's codon pickers with the given amino acid codon weights +func (table *TranslationTable) UpdateWeights(aminoAcids []AminoAcid) error { + // regenerate a map of codons -> amino acid + + var updatedTranslationMap = make(map[string]string) + for _, aminoAcid := range table.AminoAcids { + for _, codon := range aminoAcid.Codons { + updatedTranslationMap[codon.Triplet] = aminoAcid.Letter + } + } + + table.TranslationMap = updatedTranslationMap + + // Update Chooser + updatedChoosers, err := newAminoAcidChoosers(table.AminoAcids) + if err != nil { + return err + } + + table.Choosers = updatedChoosers + table.AminoAcids = aminoAcids + + return nil +} + +// UpdateWeightsWithSequence will look at the coding regions in the given genbank data, and use those to generate new +// weights for the codons in the translation table. The next time a sequence is optimised, it will use those updated +// weights. +// +// This can be used to, for example, figure out which DNA sequence is needed to give the best yield of protein when +// trying to express a protein across different species +func (table *TranslationTable) UpdateWeightsWithSequence(data genbank.Genbank) error { + codingRegions, err := extractCodingRegion(data) + if err != nil { + return err + } + + table.Stats.GeneCount = len(codingRegions) + for _, sequence := range codingRegions { + table.Stats.StartCodonCount[sequence[:3]]++ + } + + if len(codingRegions) == 0 { + return errNoCodingRegions + } + + // weight our codon optimization table using the regions we collected from the genbank file above + newWeights := weightAminoAcids(strings.Join(codingRegions, ""), table.AminoAcids) + + return table.UpdateWeights(newWeights) +} + +// Translate will return an amino acid sequence which the given DNA will yield +func (table *TranslationTable) Translate(dnaSeq string) (string, error) { + if dnaSeq == "" { + return "", errEmptySequenceString + } + + var aminoAcids strings.Builder + var currentCodon strings.Builder + translationTable := table.TranslationMap + startCodonTable := table.StartCodonTable + + startCodonReached := false + for _, letter := range dnaSeq { + // add current nucleotide to currentCodon + currentCodon.WriteRune(letter) + + // if current nucleotide is the third in a codon translate to aminoAcid write to aminoAcids and reset currentCodon. + // use start codon table for the first codon only, erroring out if an invalid start codon is provided + if currentCodon.Len() == 3 { + if startCodonReached { + aminoAcids.WriteString(translationTable[strings.ToUpper(currentCodon.String())]) + } else { + aminoAcid, ok := startCodonTable[strings.ToUpper(currentCodon.String())] + if !ok { + return "", fmt.Errorf("start codon %q is not in start codon table %v", currentCodon.String(), startCodonTable) + } + aminoAcids.WriteString(aminoAcid) + startCodonReached = true + } + + // reset codon string builder for next codon. + currentCodon.Reset() + } + } + return aminoAcids.String(), nil +} + +// weightAminoAcids weights each codon in a codon table according to input string codon frequency, adding weight to +// the given NCBI base codon table +func weightAminoAcids(sequence string, aminoAcids []AminoAcid) []AminoAcid { sequence = strings.ToUpper(sequence) codonFrequencyMap := getCodonFrequency(sequence) - for aminoAcidIndex, aminoAcid := range table.AminoAcids { + for aminoAcidIndex, aminoAcid := range aminoAcids { // apply weights to codonTable for codonIndex, codon := range aminoAcid.Codons { - table.AminoAcids[aminoAcidIndex].Codons[codonIndex].Weight = codonFrequencyMap[codon.Triplet] + aminoAcids[aminoAcidIndex].Codons[codonIndex].Weight = codonFrequencyMap[codon.Triplet] } } - return table + + return aminoAcids } -// GenerateStartCodonTable returns a mapping from the start codons of a Table to their associated amino acids. -// For our codonTable implementation, assumes that we always map to Met. -func (table codonTable) GenerateStartCodonTable() map[string]string { - result := make(map[string]string) - for _, codon := range table.StartCodons { - result[codon] = "M" +// extractCodingRegion loops through genbank data to find all CDS (coding sequences) +func extractCodingRegion(data genbank.Genbank) ([]string, error) { + codingRegions := []string{} + + // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder + for _, feature := range data.Features { + if feature.Type == "CDS" { + sequence, err := feature.GetSequence() + if err != nil { + return nil, err + } + + // Note: sometimes, genbank files will have annotated CDSs that are pseudo genes (not having triplet codons). + // This will shift the entire codon table, messing up the end results. To fix this, make sure to do a modulo + // check. + if len(sequence)%3 != 0 { + continue + } + + codingRegions = append(codingRegions, sequence) + } } - return result + return codingRegions, nil } // getCodonFrequency takes a DNA sequence and returns a hashmap of its codons and their frequencies. @@ -231,17 +328,13 @@ func getCodonFrequency(sequence string) map[string]int { return codonFrequencyHashMap } -func (table codonTable) IsEmpty() bool { - return len(table.StartCodons) == 0 && len(table.StopCodons) == 0 && len(table.AminoAcids) == 0 -} - -// Chooser is a codonTable method to convert a codon table to a chooser -func (table codonTable) Chooser() (map[string]weightedRand.Chooser, error) { +// newAminoAcidChoosers is a codonTable method to convert a codon table to a chooser +func newAminoAcidChoosers(aminoAcids []AminoAcid) (map[string]weightedRand.Chooser, error) { // This maps codon tables structure to weightRand.NewChooser structure codonChooser := make(map[string]weightedRand.Chooser) // iterate over every amino acid in the codonTable - for _, aminoAcid := range table.AminoAcids { + for _, aminoAcid := range aminoAcids { // create a list of codon choices for this specific amino acid codonChoices := make([]weightedRand.Choice, len(aminoAcid.Codons)) @@ -264,7 +357,7 @@ func (table codonTable) Chooser() (map[string]weightedRand.Chooser, error) { // add this chooser set to the codonChooser map under the name of the aminoAcid it represents. chooser, err := newChooserFn(codonChoices...) if err != nil { - return nil, fmt.Errorf("weightedRand.NewChooser() error: %s", err) + return nil, fmt.Errorf("weightedRand.NewChooser() error: %w", err) } codonChooser[aminoAcid.Letter] = *chooser @@ -272,29 +365,6 @@ func (table codonTable) Chooser() (map[string]weightedRand.Chooser, error) { return codonChooser, nil } -// GenerateTranslationTable generates a map of codons -> amino acid -func (table codonTable) GenerateTranslationTable() map[string]string { - var translationMap = make(map[string]string) - for _, aminoAcid := range table.AminoAcids { - for _, codon := range aminoAcid.Codons { - translationMap[codon.Triplet] = aminoAcid.Letter - } - } - return translationMap -} - -func (table codonTable) GetStartCodons() []string { - return table.StartCodons -} - -func (table codonTable) GetStopCodons() []string { - return table.StopCodons -} - -func (table codonTable) GetAminoAcids() []AminoAcid { - return table.AminoAcids -} - /****************************************************************************** Oct, 15, 2020 @@ -323,7 +393,7 @@ Tim ******************************************************************************/ // Function to generate default codon tables from NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi -func generateCodonTable(aminoAcids, starts string) codonTable { +func generateCodonTable(aminoAcids, starts string) *TranslationTable { base1 := "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" base2 := "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" base3 := "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" @@ -349,16 +419,48 @@ func generateCodonTable(aminoAcids, starts string) codonTable { for k, v := range aminoAcidMap { aminoAcidSlice = append(aminoAcidSlice, AminoAcid{string(k), v}) } - return codonTable{startCodons, stopCodons, aminoAcidSlice} + + // generate a map of codons -> amino acid + + var translationMap = make(map[string]string) + for _, aminoAcid := range aminoAcidSlice { + for _, codon := range aminoAcid.Codons { + translationMap[codon.Triplet] = aminoAcid.Letter + } + } + + // GenerateStartCodonTable returns a mapping from the start codons of a Table to their associated amino acids. + // For our codonTable implementation, assumes that we always map to Met. + + startCodonsMap := make(map[string]string) + for _, codon := range startCodons { + startCodonsMap[codon] = "M" + } + + // This function is run at buildtime and failure here means we have an invalid codon table. + chooser, err := newAminoAcidChoosers(aminoAcidSlice) + if err != nil { + panic(fmt.Errorf("tried to generate an invalid codon table %w", err)) + } + + return &TranslationTable{ + StartCodons: startCodons, + StopCodons: stopCodons, + AminoAcids: aminoAcidSlice, + TranslationMap: translationMap, + StartCodonTable: startCodonsMap, + Choosers: chooser, + Stats: NewStats(), + } } -// GetCodonTable takes the index of desired NCBI codon table and returns it. -func GetCodonTable(index int) Table { - return defaultCodonTablesByNumber[index] +// NewTranslationTable takes the index of desired NCBI codon table and returns it. +func NewTranslationTable(index int) *TranslationTable { + return translationTablesByNumber[index].Copy() } -// defaultCodonTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. -var defaultCodonTablesByNumber = map[int]codonTable{ +// translationTablesByNumber stores all codon tables published by NCBI https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi using numbered indices. +var translationTablesByNumber = map[int]*TranslationTable{ 1: generateCodonTable("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M------**--*----M---------------M----------------------------"), 2: generateCodonTable("FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "----------**--------------------MMMM----------**---M------------"), 3: generateCodonTable("FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------**----------------------MM---------------M------------"), @@ -442,21 +544,21 @@ Keoni ******************************************************************************/ // ParseCodonJSON parses a codonTable JSON file. -func ParseCodonJSON(file []byte) Table { - var codonTable codonTable +func ParseCodonJSON(file []byte) *TranslationTable { + var codonTable TranslationTable _ = json.Unmarshal(file, &codonTable) - return codonTable + return &codonTable } // ReadCodonJSON reads a codonTable JSON file. -func ReadCodonJSON(path string) Table { +func ReadCodonJSON(path string) *TranslationTable { file, _ := os.ReadFile(path) codonTable := ParseCodonJSON(file) return codonTable } // WriteCodonJSON writes a codonTable struct out to JSON. -func WriteCodonJSON(codonTable Table, path string) { +func WriteCodonJSON(codonTable *TranslationTable, path string) { file, _ := json.MarshalIndent(codonTable, "", " ") _ = os.WriteFile(path, file, 0644) } @@ -492,27 +594,26 @@ Keoni // CompromiseCodonTable takes 2 CodonTables and makes a new codonTable // that is an equal compromise between the two tables. -func CompromiseCodonTable(firstCodonTable, secondCodonTable Table, cutOff float64) (Table, error) { - // Initialize output codonTable, c - var c codonTable +func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, cutOff float64) (*TranslationTable, error) { + // Copy first table to base our merge on + // + // this take start and stop strings from first table + // and use them as start + stops in final codonTable + mergedTable := firstCodonTable.Copy() + // Check if cutOff is too high or low (this is converted to a percent) if cutOff < 0 { - return c, errors.New("cut off too low, cannot be less than 0") + return mergedTable, errors.New("cut off too low, cannot be less than 0") } if cutOff > 1 { - return c, errors.New("cut off too high, cannot be greater than 1") + return mergedTable, errors.New("cut off too high, cannot be greater than 1") } - // Take start and stop strings from first table - // and use them as start + stops in final codonTable - c.StartCodons = firstCodonTable.GetStartCodons() - c.StopCodons = firstCodonTable.GetStopCodons() - // Initialize the finalAminoAcid list for the output codonTable var finalAminoAcids []AminoAcid // Loop over all AminoAcids represented in the first codonTable - for _, firstAa := range firstCodonTable.GetAminoAcids() { + for _, firstAa := range firstCodonTable.AminoAcids { var firstTriplets []string var firstWeights []int var firstTotal int @@ -525,7 +626,7 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable Table, cutOff float6 firstTriplets = append(firstTriplets, firstCodon.Triplet) firstWeights = append(firstWeights, firstCodon.Weight) firstTotal = firstTotal + firstCodon.Weight - for _, secondAa := range secondCodonTable.GetAminoAcids() { + for _, secondAa := range secondCodonTable.AminoAcids { if secondAa.Letter == firstAa.Letter { for _, secondCodon := range secondAa.Codons { // For each codon from firstCodonTable, get the @@ -568,19 +669,24 @@ func CompromiseCodonTable(firstCodonTable, secondCodonTable Table, cutOff float6 // Append list of Codons to finalAminoAcids finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - c.AminoAcids = finalAminoAcids - return c, nil + + err := mergedTable.UpdateWeights(finalAminoAcids) + if err != nil { + return nil, err + } + + return mergedTable, nil } // AddCodonTable takes 2 CodonTables and adds them together to create // a new codonTable. -func AddCodonTable(firstCodonTable, secondCodonTable Table) Table { +func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*TranslationTable, error) { // Add up codons var finalAminoAcids []AminoAcid - for _, firstAa := range firstCodonTable.GetAminoAcids() { + for _, firstAa := range firstCodonTable.AminoAcids { var finalCodons []Codon for _, firstCodon := range firstAa.Codons { - for _, secondAa := range secondCodonTable.GetAminoAcids() { + for _, secondAa := range secondCodonTable.AminoAcids { for _, secondCodon := range secondAa.Codons { if firstCodon.Triplet == secondCodon.Triplet { finalCodons = append(finalCodons, Codon{firstCodon.Triplet, firstCodon.Weight + secondCodon.Weight}) @@ -591,9 +697,12 @@ func AddCodonTable(firstCodonTable, secondCodonTable Table) Table { finalAminoAcids = append(finalAminoAcids, AminoAcid{firstAa.Letter, finalCodons}) } - return codonTable{ - StartCodons: firstCodonTable.GetStartCodons(), - StopCodons: firstCodonTable.GetStopCodons(), - AminoAcids: finalAminoAcids, + mergedTable := firstCodonTable.Copy() + + err := mergedTable.UpdateWeights(finalAminoAcids) + if err != nil { + return nil, err } + + return mergedTable, nil } diff --git a/synthesis/codon/codon_test.go b/synthesis/codon/codon_test.go index c5b846253..c9607cd0f 100644 --- a/synthesis/codon/codon_test.go +++ b/synthesis/codon/codon_test.go @@ -16,7 +16,7 @@ func TestTranslation(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -27,7 +27,7 @@ func TestTranslationAlwaysMapsStartCodonToMet(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "TTGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslation has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -35,23 +35,14 @@ func TestTranslationAlwaysMapsStartCodonToMet(t *testing.T) { func TestTranslationErrorsOnIncorrectStartCodon(t *testing.T) { badSequence := "GGG" - if _, gotErr := Translate(badSequence, GetCodonTable(11)); gotErr == nil { + if _, gotErr := NewTranslationTable(11).Translate(badSequence); gotErr == nil { t.Errorf("Translation should return an error if given an incorrect start codon") } } -func TestTranslationErrorsOnEmptyCodonTable(t *testing.T) { - emtpyCodonTable := codonTable{} - _, err := Translate("A", emtpyCodonTable) - - if err != errEmptyCodonTable { - t.Error("Translation should return an error if given an empty codon table") - } -} - func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := GetCodonTable(1) - _, err := Translate("", nonEmptyCodonTable) + nonEmptyCodonTable := NewTranslationTable(1) + _, err := nonEmptyCodonTable.Translate("") if err != errEmptySequenceString { t.Error("Translation should return an error if given an empty sequence string") @@ -61,7 +52,7 @@ func TestTranslationErrorsOnEmptyAminoAcidString(t *testing.T) { func TestTranslationMixedCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaaTTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationMixedCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -69,7 +60,7 @@ func TestTranslationMixedCase(t *testing.T) { func TestTranslationLowerCase(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "atggctagcaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgatgttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgctacatacggaaagcttacccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactactttctcttatggtgttcaatgcttttcccgttatccggatcatatgaaacggcatgactttttcaagagtgccatgcccgaaggttatgtacaggaacgcactatatctttcaaagatgacgggaactacaagacgcgtgctgaagtcaagtttgaaggtgatacccttgttaatcgtatcgagttaaaaggtattgattttaaagaagatggaaacattctcggacacaaactcgagtacaactataactcacacaatgtatacatcacggcagacaaacaaaagaatggaatcaaagctaacttcaaaattcgccacaacattgaagatggatccgttcaactagcagaccattatcaacaaaatactccaattggcgatggccctgtccttttaccagacaaccattacctgtcgacacaatctgccctttcgaaagatcccaacgaaaagcgtgaccacatggtccttcttgagtttgtaactgctgctgggattacacatggcatggatgagctctacaaataa" - if got, _ := Translate(gfpDnaSequence, GetCodonTable(11)); got != gfpTranslation { + if got, _ := NewTranslationTable(11).Translate(gfpDnaSequence); got != gfpTranslation { t.Errorf("TestTranslationLowerCase has failed. Translate has returned %q, want %q", got, gfpTranslation) } } @@ -78,27 +69,17 @@ func TestOptimize(t *testing.T) { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + table := NewTranslationTable(11) + err := table.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + codonTable := NewTranslationTable(11) - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) - optimizedSequenceTranslation, _ := Translate(optimizedSequence, optimizationTable) + optimizedSequence, _ := table.OptimizeSequence(gfpTranslation) + optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence) if optimizedSequenceTranslation != gfpTranslation { t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation) @@ -108,27 +89,19 @@ func TestOptimize(t *testing.T) { func TestOptimizeSameSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - var codonTable = GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) + } + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - var optimizationTable = codonTable.OptimizeTable(codingRegions) randomSeed := 10 - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable, randomSeed) - otherOptimizedSequence, _ := Optimize(gfpTranslation, optimizationTable, randomSeed) + optimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation, randomSeed) + otherOptimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation, randomSeed) if optimizedSequence != otherOptimizedSequence { t.Error("Optimized sequence with the same random seed are not the same") @@ -138,44 +111,23 @@ func TestOptimizeSameSeed(t *testing.T) { func TestOptimizeDifferentSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" var sequence, _ = genbank.Read("../../data/puc19.gbk") - var codonTable = GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - var optimizationTable = codonTable.OptimizeTable(codingRegions) - - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) - otherOptimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) + optimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation) + otherOptimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation) if optimizedSequence == otherOptimizedSequence { t.Error("Optimized sequence with different random seed have the same result") } } -func TestOptimizeErrorsOnEmptyCodonTable(t *testing.T) { - emtpyCodonTable := codonTable{} - _, err := Optimize("A", emtpyCodonTable) - - if err != errEmptyCodonTable { - t.Error("Optimize should return an error if given an empty codon table") - } -} - func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { - nonEmptyCodonTable := GetCodonTable(1) - _, err := Optimize("", nonEmptyCodonTable) + nonEmptyCodonTable := NewTranslationTable(1) + _, err := nonEmptyCodonTable.OptimizeSequence("") if err != errEmptyAminoAcidString { t.Error("Optimize should return an error if given an empty amino acid string") @@ -183,32 +135,14 @@ func TestOptimizeErrorsOnEmptyAminoAcidString(t *testing.T) { } func TestOptimizeErrorsOnInvalidAminoAcid(t *testing.T) { aminoAcids := "TOP" - table := GetCodonTable(1) // does not contain 'O' + table := NewTranslationTable(1) // does not contain 'O' - _, optimizeErr := Optimize(aminoAcids, table) + _, optimizeErr := table.OptimizeSequence(aminoAcids) assert.EqualError(t, optimizeErr, invalidAminoAcidError{'O'}.Error()) } -func TestOptimizeErrorsOnBrokenChooser(t *testing.T) { - gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" - - chooserErr := errors.New("chooser rigged to fail") - - codonTable := &mockTable{ - ChooserFn: func() (map[string]weightedRand.Chooser, error) { - return nil, chooserErr - }, - IsEmptyFn: func() bool { - return false - }, - } - - _, err := Optimize(gfpTranslation, codonTable) - assert.EqualError(t, err, chooserErr.Error()) -} - func TestGetCodonFrequency(t *testing.T) { - translationTable := GetCodonTable(11).GenerateTranslationTable() + translationTable := NewTranslationTable(11).TranslationMap var codons strings.Builder @@ -251,21 +185,6 @@ func TestGetCodonFrequency(t *testing.T) { } } -func TestChooserError(t *testing.T) { - codonTable := GetCodonTable(11) - - oldChooserFn := newChooserFn - newChooserFn = func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { - return nil, errors.New("new chooser rigged to fail") - } - defer func() { - newChooserFn = oldChooserFn - }() - - _, err := codonTable.Chooser() - assert.EqualError(t, err, "weightedRand.NewChooser() error: new chooser rigged to fail") -} - /****************************************************************************** JSON related tests begin here. @@ -294,46 +213,23 @@ Codon Compromise + Add related tests begin here. */ func TestCompromiseCodonTable(t *testing.T) { sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder + // weight our codon optimization table using the regions we collected from the genbank file above - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) - sequence2, _ := genbank.Read("../../data/phix174.gb") - codonTable2 := GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder2 strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence2.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder2.WriteString(sequence) - } + optimizationTable2 := NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + t.Error(err) } - // get the concatenated sequence string of the coding regions - codingRegions2 := codingRegionsBuilder2.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable2 := codonTable2.OptimizeTable(codingRegions2) - - _, err := CompromiseCodonTable(optimizationTable, optimizationTable2, -1.0) // Fails too low + _, err = CompromiseCodonTable(optimizationTable, optimizationTable2, -1.0) // Fails too low if err == nil { t.Errorf("Compromise table should fail on -1.0") } @@ -341,20 +237,53 @@ func TestCompromiseCodonTable(t *testing.T) { if err == nil { t.Errorf("Compromise table should fail on 10.0") } -} -type mockTable struct { - codonTable - ChooserFn func() (map[string]weightedRand.Chooser, error) - IsEmptyFn func() bool -} + // replace chooser fn with test one + newChooserFn = func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, errors.New("new chooser rigged to fail") + } -func (t *mockTable) Chooser() (map[string]weightedRand.Chooser, error) { - return t.ChooserFn() + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + _, err = CompromiseCodonTable(optimizationTable, optimizationTable2, 0.1) + if err == nil { + t.Errorf("Compromise table should fail when new chooser func rigged") + } } -func (t *mockTable) IsEmpty() bool { - return t.IsEmptyFn() +func TestAddCodonTable(t *testing.T) { + sequence, _ := genbank.Read("../../data/puc19.gbk") + + // weight our codon optimization table using the regions we collected from the genbank file above + + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) + } + + sequence2, _ := genbank.Read("../../data/phix174.gb") + optimizationTable2 := NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + t.Error(err) + } + + // replace chooser fn with test one + newChooserFn = func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, errors.New("new chooser rigged to fail") + } + + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + _, err = AddCodonTable(optimizationTable, optimizationTable2) + if err == nil { + t.Errorf("Compromise table should fail when new chooser func rigged") + } } func TestCapitalizationRegression(t *testing.T) { @@ -362,29 +291,246 @@ func TestCapitalizationRegression(t *testing.T) { gfpTranslation := "MaSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + t.Error(err) + } + + optimizedSequence, _ := optimizationTable.OptimizeSequence(gfpTranslation, 1) + optimizedSequenceTranslation, _ := optimizationTable.Translate(optimizedSequence) - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } + if optimizedSequenceTranslation != strings.ToUpper(gfpTranslation) { + t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation) } +} + +func TestOptimizeSequence(t *testing.T) { + t.Parallel() + + var ( + gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" + optimisedGFP = "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA" + puc19 = func() genbank.Genbank { + seq, err := genbank.Read("../../data/puc19.gbk") + if err != nil { + t.Fatal(err) + } + + return seq + }() + ) + + tests := []struct { + name string + + sequenceToOptimise string + updateWeightsWith genbank.Genbank + wantOptimised string + + wantUpdateWeightsErr error + wantOptimiseErr error + }{ + { + name: "ok", + + sequenceToOptimise: gfpTranslation, + updateWeightsWith: puc19, + wantOptimised: optimisedGFP, + + wantUpdateWeightsErr: nil, + wantOptimiseErr: nil, + }, + { + name: "giving no sequence to optimise", - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() + sequenceToOptimise: "", + updateWeightsWith: puc19, + wantOptimised: "", - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + wantUpdateWeightsErr: nil, + wantOptimiseErr: errEmptyAminoAcidString, + }, + { + name: "updating weights with a sequence with no CDS", - optimizedSequence, _ := Optimize(gfpTranslation, optimizationTable) - optimizedSequenceTranslation, _ := Translate(optimizedSequence, optimizationTable) + sequenceToOptimise: "", + updateWeightsWith: genbank.Genbank{}, + wantOptimised: "", - if optimizedSequenceTranslation != strings.ToUpper(gfpTranslation) { - t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation) + wantUpdateWeightsErr: errNoCodingRegions, + wantOptimiseErr: errEmptyAminoAcidString, + }, + } + + for _, tt := range tests { + var tt = tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + optimizationTable := NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(tt.updateWeightsWith) + if !errors.Is(err, tt.wantUpdateWeightsErr) { + t.Errorf("got %v, want %v", err, tt.wantUpdateWeightsErr) + } + + got, err := optimizationTable.OptimizeSequence(tt.sequenceToOptimise, 1) + if !errors.Is(err, tt.wantOptimiseErr) { + t.Errorf("got %v, want %v", err, tt.wantOptimiseErr) + } + + if !cmp.Equal(got, tt.wantOptimised) { + t.Errorf("got and tt.wantOptimised didn't match %s", cmp.Diff(got, tt.wantOptimised)) + } + }) + } +} + +func TestNewAminoAcidChooser(t *testing.T) { + var ( + mockError = errors.New("new chooser rigged to fail") + ) + + tests := []struct { + name string + + aminoAcids []AminoAcid + + chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) + + wantErr error + }{ + { + name: "ok", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: weightedRand.NewChooser, + + wantErr: nil, + }, + { + name: "chooser fn constructor error", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, mockError + }, + + wantErr: mockError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // replace chooser fn with test one + newChooserFn = tt.chooserFn + + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + _, err := newAminoAcidChoosers(tt.aminoAcids) + if !errors.Is(err, tt.wantErr) { + t.Errorf("got %v, want %v", err, tt.wantErr) + } + }) + } +} + +func TestUpdateWeights(t *testing.T) { + var ( + mockError = errors.New("new chooser rigged to fail") + ) + + tests := []struct { + name string + + aminoAcids []AminoAcid + + chooserFn func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) + + wantErr error + }{ + { + name: "ok", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: weightedRand.NewChooser, + + wantErr: nil, + }, + { + name: "chooser fn constructor error", + + aminoAcids: []AminoAcid{ + { + Letter: "R", + Codons: []Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + }, + }, + }, + + chooserFn: func(choices ...weightedRand.Choice) (*weightedRand.Chooser, error) { + return nil, mockError + }, + + wantErr: mockError, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // replace chooser fn with test one + newChooserFn = tt.chooserFn + + defer func() { + newChooserFn = weightedRand.NewChooser + }() + + optimizationTable := NewTranslationTable(11) + + err := optimizationTable.UpdateWeights(tt.aminoAcids) + if !errors.Is(err, tt.wantErr) { + t.Errorf("got %v, want %v", err, tt.wantErr) + } + }) } } diff --git a/synthesis/codon/example_test.go b/synthesis/codon/example_test.go index ce5f90612..0374a8f2a 100644 --- a/synthesis/codon/example_test.go +++ b/synthesis/codon/example_test.go @@ -3,69 +3,89 @@ package codon_test import ( "fmt" "os" - "strings" "github.com/TimothyStiles/poly/io/genbank" "github.com/TimothyStiles/poly/synthesis/codon" ) -func ExampleTranslate() { +func ExampleTranslationTable_Translate() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA" - testTranslation, _ := codon.Translate(gfpDnaSequence, codon.GetCodonTable(11)) // need to specify which codons map to which amino acids per NCBI table + testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table fmt.Println(gfpTranslation == testTranslation) // output: true } -func ExampleOptimize() { +func ExampleTranslationTable_UpdateWeights() { gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" - - sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // initiate genes - genes := 0 - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - // Note: sometimes, genbank files will have annotated CDSs that are pseudo genes (not having triplet codons). - // This will shift the entire codon table, messing up the end results. To fix this, make sure to do a modulo - // check. - if len(sequence)%3 == 0 { - codingRegionsBuilder.WriteString(sequence) - - // Another good double check is to count genes, then count stop codons. - genes++ - } - } + sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA" + + table := codon.NewTranslationTable(11) + + // this example is using custom weights for different codons for Arginine. Use this if you would rather use your own + // codon weights, they can also be computed for you with `UpdateWeightsWithSequence`. + + err := table.UpdateWeights([]codon.AminoAcid{ + { + Letter: "R", + Codons: []codon.Codon{ + { + Triplet: "CGU", + Weight: 1, + }, + { + Triplet: "CGA", + Weight: 2, + }, + { + Triplet: "CGG", + Weight: 4, + }, + { + Triplet: "AGA", + Weight: 6, + }, + { + Triplet: "AGG", + Weight: 2, + }, + }, + }, + }) + if err != nil { + fmt.Println("Could not update weights in example") } - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() + optimizedSequence, _ := table.OptimizeSequence(gfpTranslation, 1) - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + fmt.Println(optimizedSequence == sequenceWithCustomWeights) + // output: true +} + +func ExampleTranslationTable_OptimizeSequence() { + gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" + + sequence, _ := genbank.Read("../../data/puc19.gbk") + codonTable := codon.NewTranslationTable(11) + _ = codonTable.UpdateWeightsWithSequence(sequence) // Here, we double check if the number of genes is equal to the number of stop codons stopCodonCount := 0 - for _, aa := range optimizationTable.GetAminoAcids() { + for _, aa := range codonTable.AminoAcids { if aa.Letter == "*" { for _, codon := range aa.Codons { stopCodonCount = stopCodonCount + codon.Weight } } } - if stopCodonCount != genes { + + if stopCodonCount != codonTable.Stats.GeneCount { fmt.Println("Stop codons don't equal number of genes!") } - optimizedSequence, _ := codon.Optimize(gfpTranslation, optimizationTable) - optimizedSequenceTranslation, _ := codon.Translate(optimizedSequence, optimizationTable) + optimizedSequence, _ := codonTable.OptimizeSequence(gfpTranslation) + optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence) fmt.Println(optimizedSequenceTranslation == gfpTranslation) // output: true @@ -74,7 +94,7 @@ func ExampleOptimize() { func ExampleReadCodonJSON() { codontable := codon.ReadCodonJSON("../../data/bsub_codon_test.json") - fmt.Println(codontable.GetAminoAcids()[0].Codons[0].Weight) + fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight) //output: 28327 } @@ -82,7 +102,7 @@ func ExampleParseCodonJSON() { file, _ := os.ReadFile("../../data/bsub_codon_test.json") codontable := codon.ParseCodonJSON(file) - fmt.Println(codontable.GetAminoAcids()[0].Codons[0].Weight) + fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight) //output: 28327 } @@ -94,52 +114,29 @@ func ExampleWriteCodonJSON() { // cleaning up test data os.Remove("../../data/codon_test.json") - fmt.Println(testCodonTable.GetAminoAcids()[0].Codons[0].Weight) + fmt.Println(testCodonTable.GetWeightedAminoAcids()[0].Codons[0].Weight) //output: 28327 } func ExampleCompromiseCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } - } - - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + optimizationTable := codon.NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) + } sequence2, _ := genbank.Read("../../data/phix174.gb") - codonTable2 := codon.GetCodonTable(11) - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder2 strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence2.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder2.WriteString(sequence) - } + optimizationTable2 := codon.NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) } - // get the concatenated sequence string of the coding regions - codingRegions2 := codingRegionsBuilder2.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable2 := codonTable2.OptimizeTable(codingRegions2) - finalTable, _ := codon.CompromiseCodonTable(optimizationTable, optimizationTable2, 0.1) - for _, aa := range finalTable.GetAminoAcids() { + for _, aa := range finalTable.GetWeightedAminoAcids() { for _, codon := range aa.Codons { if codon.Triplet == "TAA" { fmt.Println(codon.Weight) @@ -151,47 +148,27 @@ func ExampleCompromiseCodonTable() { func ExampleAddCodonTable() { sequence, _ := genbank.Read("../../data/puc19.gbk") - codonTable := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder.WriteString(sequence) - } - } - - // get the concatenated sequence string of the coding regions - codingRegions := codingRegionsBuilder.String() // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable := codonTable.OptimizeTable(codingRegions) + optimizationTable := codon.NewTranslationTable(11) + err := optimizationTable.UpdateWeightsWithSequence(sequence) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) + } sequence2, _ := genbank.Read("../../data/phix174.gb") - codonTable2 := codon.GetCodonTable(11) - - // a string builder to build a single concatenated string of all coding regions - var codingRegionsBuilder2 strings.Builder - - // iterate through the features of the genbank file and if the feature is a coding region, append the sequence to the string builder - for _, feature := range sequence2.Features { - if feature.Type == "CDS" { - sequence, _ := feature.GetSequence() - codingRegionsBuilder2.WriteString(sequence) - } + optimizationTable2 := codon.NewTranslationTable(11) + err = optimizationTable2.UpdateWeightsWithSequence(sequence2) + if err != nil { + panic(fmt.Errorf("got unexpected error in an example: %w", err)) } - // get the concatenated sequence string of the coding regions - codingRegions2 := codingRegionsBuilder2.String() - - // weight our codon optimization table using the regions we collected from the genbank file above - optimizationTable2 := codonTable2.OptimizeTable(codingRegions2) + finalTable, err := codon.AddCodonTable(optimizationTable, optimizationTable2) + if err != nil { + panic(fmt.Errorf("got error in adding codon table example: %w", err)) + } - finalTable := codon.AddCodonTable(optimizationTable, optimizationTable2) - for _, aa := range finalTable.GetAminoAcids() { + for _, aa := range finalTable.AminoAcids { for _, codon := range aa.Codons { if codon.Triplet == "GGC" { fmt.Println(codon.Weight) diff --git a/synthesis/fix/synthesis.go b/synthesis/fix/synthesis.go index 4aa50767e..ad4e16631 100644 --- a/synthesis/fix/synthesis.go +++ b/synthesis/fix/synthesis.go @@ -238,7 +238,7 @@ func Cds(sequence string, codontable codon.Table, problematicSequenceFuncs []fun // Build historical maps and full amino acid weights aminoAcidWeightTable := make(map[string]int) - for _, aminoAcid := range codontable.GetAminoAcids() { + for _, aminoAcid := range codontable.GetWeightedAminoAcids() { var aminoAcidTotal int for _, codon := range aminoAcid.Codons { // Get the total weights of all the codons for a given amino acid. @@ -271,7 +271,7 @@ func Cds(sequence string, codontable codon.Table, problematicSequenceFuncs []fun // Build weight map. The weight map gives the relative normalized weight of // any given codon triplet. - for _, aminoAcid := range codontable.GetAminoAcids() { + for _, aminoAcid := range codontable.GetWeightedAminoAcids() { for _, codon := range aminoAcid.Codons { codonWeightRatio := float64(codon.Weight) / float64(aminoAcidWeightTable[aminoAcid.Letter]) normalizedCodonWeight := 100 * codonWeightRatio diff --git a/synthesis/fix/synthesis_test.go b/synthesis/fix/synthesis_test.go index 9e274d95d..7d6ceb99a 100644 --- a/synthesis/fix/synthesis_test.go +++ b/synthesis/fix/synthesis_test.go @@ -40,7 +40,7 @@ func BenchmarkCds(b *testing.B) { var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site.")) for i := 0; i < b.N; i++ { - seq, _ := codon.Optimize(phusion, codonTable) + seq, _ := codonTable.OptimizeSequence(phusion) optimizedSeq, changes, err := Cds(seq, codonTable, functions) if err != nil { b.Errorf("Failed to fix phusion with error: %s", err) @@ -76,7 +76,7 @@ func TestCds(t *testing.T) { phusion := "MGHHHHHHHHHHSSGILDVDYITEEGKPVIRLFKKENGKFKIEHDRTFRPYIYALLRDDSKIEEVKKITGERHGKIVRIVDVEKVEKKFLGKPITVWKLYLEHPQDVPTIREKVREHPAVVDIFEYDIPFAKRYLIDKGLIPMEGEEELKILAFDIETLYHEGEEFGKGPIIMISYADENEAKVITWKNIDLPYVEVVSSEREMIKRFLRIIREKDPDIIVTYNGDSFDFPYLAKRAEKLGIKLTIGRDGSEPKMQRIGDMTAVEVKGRIHFDLYHVITRTINLPTYTLEAVYEAIFGKPKEKVYADEIAKAWESGENLERVAKYSMEDAKATYELGKEFLPMEIQLSRLVGQPLWDVSRSSTGNLVEWFLLRKAYERNEVAPNKPSEEEYQRRLRESYTGGFVKEPEKGLWENIVYLDFRALYPSIIITHNVSPDTLNLEGCKNYDIAPQVGHKFCKDIPGFIPSLLGHLLEERQKIKTKMKETQDPIEKILLDYRQKAIKLLANSFYGYYGYAKARWYCKECAESVTAWGRKYIELVWKELEEKFGFKVLYIDTDGLYATIPGGESEEIKKKALEFVKYINSKLPGLLELEYEGFYKRGFFVTKKRYAVIDEEGKVITRGLEIVRRDWSEIAKETQARVLETILKHGDVEEAVRIVKEVIQKLANYEIPPEKLAIYEQITRPLHEYKAIGPHVAVAKKLAAKGVKIKPGMVIGYIVLRGDGPISNRAILAEEYDPKKHKYDAEYYIENQVLPAVLRILEGFGYRKEDLRYQKTRQVGLTSWLNIKKSGTGGGGATVKFKYKGEEKEVDISKIKKVWRVGKMISFTYDEGGGKTGRGAVSEKDAPKELLQMLEKQKK*" var functions []func(string, chan DnaSuggestion, *sync.WaitGroup) functions = append(functions, RemoveSequence([]string{"GAAGAC", "GGTCTC", "GCGATG", "CGTCTC", "GCTCTTC", "CACCTGC"}, "TypeIIS restriction enzyme site.")) - seq, _ := codon.Optimize(phusion, codonTable) + seq, _ := codonTable.OptimizeSequence(phusion) optimizedSeq, _, err := Cds(seq, codonTable, functions) if err != nil { t.Errorf("Failed with error: %s", err) From 489cf9747618835d0aa9eea7f751d3ab8ab7ab50 Mon Sep 17 00:00:00 2001 From: Koeng101 Date: Wed, 15 Nov 2023 11:33:08 -0800 Subject: [PATCH 4/4] Add FragmentWithOverhangs (#387) * Add FragmentWithOverhangs * Fragment naming updated --------- Co-authored-by: Tim --- CHANGELOG.md | 4 +++ synthesis/fragment/fragment.go | 40 +++++++++++++++++++++-------- synthesis/fragment/fragment_test.go | 10 ++++++++ 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b564df7b..b5337e311 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Alternative start codons can now be used in the `synthesis/codon` DNA -> protein translation package (#305) - Added a parser and writer for the `pileup` sequence alignment format (#329) - Added statistics to the `synthesis/codon` package (keeping track of the observed start codon occurrences in a translation table) (#350) +- Added option to fragmenter to fragment with only certain overhangs (#387) + + + ### Fixed - `fastq` parser no longer becomes de-aligned when reading (#325) diff --git a/synthesis/fragment/fragment.go b/synthesis/fragment/fragment.go index 23a59b275..ab01d06ec 100644 --- a/synthesis/fragment/fragment.go +++ b/synthesis/fragment/fragment.go @@ -98,11 +98,11 @@ func NextOverhang(currentOverhangs []string) string { } // optimizeOverhangIteration takes in a sequence and optimally fragments it. -func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, existingOverhangs []string) ([]string, float64, error) { +func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragmentSize int, existingFragments []string, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) { // If the sequence is smaller than maxFragment size, stop iteration. if len(sequence) < maxFragmentSize { existingFragments = append(existingFragments, sequence) - return existingFragments, SetEfficiency(existingOverhangs), nil + return existingFragments, SetEfficiency(excludeOverhangs), nil } // Make sure minFragmentSize > maxFragmentSize @@ -136,6 +136,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment var bestOverhangEfficiency float64 var bestOverhangPosition int var alreadyExists bool + var buildAvailable bool for overhangOffset := 0; overhangOffset <= maxFragmentSize-minFragmentSize; overhangOffset++ { // We go from max -> min, so we can maximize the size of our fragments overhangPosition := maxFragmentSize - overhangOffset @@ -143,16 +144,27 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment // Make sure overhang isn't already in set alreadyExists = false - for _, existingOverhang := range existingOverhangs { - if existingOverhang == overhangToTest || transform.ReverseComplement(existingOverhang) == overhangToTest { + for _, excludeOverhang := range excludeOverhangs { + if excludeOverhang == overhangToTest || transform.ReverseComplement(excludeOverhang) == overhangToTest { alreadyExists = true } } - if !alreadyExists { + // Make sure overhang is in set of includeOverhangs. If includeOverhangs is + // blank, skip this check. + buildAvailable = false + if len(includeOverhangs) == 0 { + buildAvailable = true + } + for _, includeOverhang := range includeOverhangs { + if includeOverhang == overhangToTest || transform.ReverseComplement(includeOverhang) == overhangToTest { + buildAvailable = true + } + } + if !alreadyExists && buildAvailable { // See if this overhang is a palindrome if !checks.IsPalindromic(overhangToTest) { // Get this overhang set's efficiency - setEfficiency := SetEfficiency(append(existingOverhangs, overhangToTest)) + setEfficiency := SetEfficiency(append(excludeOverhangs, overhangToTest)) // If this overhang is more efficient than any other found so far, set it as the best! if setEfficiency > bestOverhangEfficiency { @@ -167,16 +179,24 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment return []string{}, float64(0), fmt.Errorf("bestOverhangPosition failed by equaling zero") } existingFragments = append(existingFragments, sequence[:bestOverhangPosition]) - existingOverhangs = append(existingOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition]) + excludeOverhangs = append(excludeOverhangs, sequence[bestOverhangPosition-4:bestOverhangPosition]) sequence = sequence[bestOverhangPosition-4:] - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, existingOverhangs) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, existingFragments, excludeOverhangs, includeOverhangs) } // Fragment fragments a sequence into fragments between the min and max size, // choosing fragment ends for optimal assembly efficiency. Since fragments will // be inserted into either a vector or primer binding sites, the first 4 and // last 4 base pairs are the initial overhang set. -func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) { +func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string) ([]string, float64, error) { + sequence = strings.ToUpper(sequence) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), []string{}) +} + +// FragmentWithOverhangs fragments a sequence with only a certain overhang set. +// This is useful if you are constraining the set of possible overhangs when +// doing more advanced forms of cloning. +func FragmentWithOverhangs(sequence string, minFragmentSize int, maxFragmentSize int, excludeOverhangs []string, includeOverhangs []string) ([]string, float64, error) { sequence = strings.ToUpper(sequence) - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...)) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, excludeOverhangs...), includeOverhangs) } diff --git a/synthesis/fragment/fragment_test.go b/synthesis/fragment/fragment_test.go index 0a71bfde9..ab3f0c153 100644 --- a/synthesis/fragment/fragment_test.go +++ b/synthesis/fragment/fragment_test.go @@ -85,3 +85,13 @@ func TestRegressionTestMatching12(t *testing.T) { t.Errorf("Expected efficiency of .99 - approximately matches NEB ligase fidelity viewer of .97. Got: %g", efficiency) } } + +func TestFragmentWithOverhangs(t *testing.T) { + defaultOverhangs := []string{"CGAG", "GTCT", "GGGG", "AAAA", "AACT", "AATG", "ATCC", "CGCT", "TTCT", "AAGC", "ATAG", "ATTA", "ATGT", "ACTC", "ACGA", "TATC", "TAGG", "TACA", "TTAC", "TTGA", "TGGA", "GAAG", "GACC", "GCCG", "TCTG", "GTTG", "GTGC", "TGCC", "CTGG", "TAAA", "TGAG", "AAGA", "AGGT", "TTCG", "ACTA", "TTAG", "TCTC", "TCGG", "ATAA", "ATCA", "TTGC", "CACG", "AATA", "ACAA", "ATGG", "TATG", "AAAT", "TCAC"} + gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa" + + _, _, err := FragmentWithOverhangs(gene, 90, 110, []string{}, defaultOverhangs) + if err != nil { + t.Errorf(err.Error()) + } +}