diff --git a/clone.go b/clone.go new file mode 100644 index 00000000..e3c9bd22 --- /dev/null +++ b/clone.go @@ -0,0 +1,315 @@ +package poly + +import ( + "errors" + "regexp" + "sort" + "strings" + "sync" +) + +/****************************************************************************** +Apr 22, 2021 + +Cloning stuff starts here. + +Since 1973, the most common way to make recombinant DNA has been restriction +enzyme cloning (though lately, homologous recombination based methods like +Gibson assembly have attracted a lot of use). The cloning functions here allow +for simulation of restriction enzyme cloning. + +For a historical review leading up to the discovery: +https://doi.org/10.1073/pnas.1313397110 + +The idea of restriction enzyme cloning is that you can cut DNA at specific +locations with restriction enzyme and then glue them back together in different +patterns using ligase. The final product is (99.9% of the time) a circular plasmid +that you can transform into a bacterial cell for propagation. + +While simulation is simple for simple cases, there are a lot of edge cases to handle, for example: +- Which input sequences are circular? How do we handle their rotations? +- Is the enzyme that is cutting directional? How do we handle that directionality? +- Are there multiple possible outputs of our ligation reaction? For example, ligations may be + to create a "library" of plasmids, in which there are millions of valid combinations. +- How do we handle sequences that get ligated in multiple orientations? + +These cloning functions handle all those problems so that they appear simple to the end user. + +In particular, there is a focus here on GoldenGate Assembly: +https://en.wikipedia.org/wiki/Golden_Gate_Cloning +https://www.neb.com/applications/cloning-and-synthetic-biology/dna-assembly-and-cloning/golden-gate-assembly + +GoldenGate is a particular kind of restriction enzyme cloning reaction that you can do +in a single tube and that is extraordinarily efficient (up to 50 parts) and is popular +for new modular DNA part toolkits. Users can easily simulate GoldenGate assembly reactions +with just their input fragments + the enzyme name. + +Let's build some DNA! + +Keoni + +PS: We do NOT (yet) handle restriction enzymes which recognize one site but cut +in multiple places (Type IIG enzymes) such as BcgI. + +******************************************************************************/ + +// CloneSequence is a simple struct that can carry a circular or linear DNA sequence. +type CloneSequence struct { + Sequence string + Circular bool +} + +// Overhang is a struct that represents the ends of a linearized sequence where Enzymes had cut. +type Overhang struct { + Length int + Position int + Forward bool +} + +// Fragment is a struct that represents linear DNA sequences with sticky ends. +type Fragment struct { + Sequence string + ForwardOverhang string + ReverseOverhang string +} + +// Enzyme is a struct that represents restriction enzymes. +type Enzyme struct { + Name string + RegexpFor *regexp.Regexp + RegexpRev *regexp.Regexp + Skip int + OverhangLen int + RecognitionSite string +} + +/****************************************************************************** + +Base cloning functions begin here. + +******************************************************************************/ + +// https://qvault.io/2019/10/21/golang-constant-maps-slices/ +func getBaseRestrictionEnzymes() map[string]Enzyme { + // Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase + enzymeMap := make(map[string]Enzyme) + + // Build default enzymes + enzymeMap["BsaI"] = Enzyme{"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"} + enzymeMap["BbsI"] = Enzyme{"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"} + enzymeMap["BtgZI"] = Enzyme{"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"} + + // Return EnzymeMap + return enzymeMap +} + +// CutWithEnzymeByName cuts a given sequence with an enzyme represented by the +// enzyme's name. It is a convenience wrapper around CutWithEnzyme that +// allows us to specify the enzyme by name. +func CutWithEnzymeByName(seq CloneSequence, directional bool, enzymeStr string) ([]Fragment, error) { + enzymeMap := getBaseRestrictionEnzymes() + if _, ok := enzymeMap[enzymeStr]; !ok { + return []Fragment{}, errors.New("Enzyme " + enzymeStr + " not found in enzymeMap") + } + enzyme := enzymeMap[enzymeStr] + return CutWithEnzyme(seq, directional, enzyme), nil +} + +// CutWithEnzyme cuts a given sequence with an enzyme represented by an Enzyme struct. +func CutWithEnzyme(seq CloneSequence, directional bool, enzyme Enzyme) []Fragment { + var fragmentSeqs []string + var sequence string + if seq.Circular { + sequence = strings.ToUpper(seq.Sequence + seq.Sequence) + } else { + sequence = strings.ToUpper(seq.Sequence) + } + + // Check for palindromes + palindromic := IsPalindromic(enzyme.RecognitionSite) + + // Find and define overhangs + var overhangs []Overhang + var forwardOverhangs []Overhang + var reverseOverhangs []Overhang + forwardCuts := enzyme.RegexpFor.FindAllStringIndex(sequence, -1) + for _, forwardCut := range forwardCuts { + forwardOverhangs = append(forwardOverhangs, Overhang{Length: enzyme.OverhangLen, Position: forwardCut[1] + enzyme.Skip, Forward: true}) + } + // Palindromic enzymes won't need reverseCuts + if !palindromic { + reverseCuts := enzyme.RegexpRev.FindAllStringIndex(sequence, -1) + for _, reverseCut := range reverseCuts { + reverseOverhangs = append(reverseOverhangs, Overhang{Length: enzyme.OverhangLen, Position: reverseCut[0] - enzyme.Skip, Forward: false}) + } + } + + // If, on a linear sequence, the last overhang's position + EnzymeSkip + EnzymeOverhangLen is over the length of the sequence, remove that overhang. + for _, overhangSet := range [][]Overhang{forwardOverhangs, reverseOverhangs} { + if len(overhangSet) > 0 { + if !seq.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.Skip+enzyme.OverhangLen > len(sequence)) { + overhangSet = overhangSet[:len(overhangSet)-1] + } + } + overhangs = append(overhangs, overhangSet...) + } + + // Sort overhangs + sort.SliceStable(overhangs, func(i, j int) bool { + return overhangs[i].Position < overhangs[j].Position + }) + + // Convert Overhangs into Fragments + var fragments []Fragment + var currentOverhang Overhang + var nextOverhang Overhang + // Linear fragments with 1 cut that are no directional will always give a + // 2 fragments + if len(overhangs) == 1 && !directional && !seq.Circular { // Check the case of a single cut + // In the case of a single cut in a linear sequence, we get two fragments with only 1 stick end + fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length:] + fragmentSeq2 := sequence[:overhangs[0].Position] + overhangSeq := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] + fragments = append(fragments, Fragment{fragmentSeq1, overhangSeq, ""}) + fragments = append(fragments, Fragment{fragmentSeq2, "", overhangSeq}) + return fragments + } + + // Circular fragments with 1 cut will always have 2 overhangs (because of the + // concat earlier). If we don't require directionality, this will always get + // cut into a single fragment + if len(overhangs) == 2 && !directional && seq.Circular { + // In the case of a single cut in a circular sequence, we get one fragment out with sticky overhangs + fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length : len(seq.Sequence)] + fragmentSeq2 := sequence[:overhangs[0].Position] + fragmentSeq := fragmentSeq1 + fragmentSeq2 + overhangSeq := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] + fragments = append(fragments, Fragment{fragmentSeq, overhangSeq, overhangSeq}) + return fragments + } + + if len(overhangs) > 1 { + // The following will iterate over the overhangs list to turn them into fragments + // There are two important variables: if the sequence is circular, and if the enzyme cutting is directional. All Type IIS enzymes + // are directional, and in normal GoldenGate reactions these fragments would be constantly cut with enzyme as the reaction runs, + // so are removed from the output sequences. If the enzyme is not directional, all fragments are valid. + // If the sequence is circular, there is a chance that the nextOverhang's position will be greater than the length of the original sequence. + // This is ok, and represents a valid cut/fragmentation of a rotation of the sequence. However, everything after will be a repeat fragment + // of current fragments, so the iteration is terminated. + for overhangIndex := 0; overhangIndex < len(overhangs)-1; overhangIndex++ { + currentOverhang = overhangs[overhangIndex] + nextOverhang = overhangs[overhangIndex+1] + // If we want directional cutting and the enzyme is not palindromic, we + // can remove fragments that are continuously cut by the enzyme. This is + // the basis of GoldenGate assembly. + if directional && !palindromic { + if currentOverhang.Forward && !nextOverhang.Forward { + fragmentSeqs = append(fragmentSeqs, sequence[currentOverhang.Position:nextOverhang.Position]) + } + if nextOverhang.Position > len(seq.Sequence) { + break + } + } else { + fragmentSeqs = append(fragmentSeqs, sequence[currentOverhang.Position:nextOverhang.Position]) + if nextOverhang.Position > len(seq.Sequence) { + break + } + } + } + // Convert fragment sequences into fragments + for _, fragment := range fragmentSeqs { + fragments = append(fragments, Fragment{Sequence: fragment[enzyme.OverhangLen : len(fragment)-enzyme.OverhangLen], ForwardOverhang: fragment[:enzyme.OverhangLen], ReverseOverhang: fragment[len(fragment)-enzyme.OverhangLen:]}) + } + } + + return fragments +} + +func recurseLigate(wg *sync.WaitGroup, c chan string, seedFragment Fragment, fragmentList []Fragment) { + // Recurse ligate simulates all possible ligations of a series of fragments. Each possible combination begins with a "seed" that fragments from the pool can be added to. + defer wg.Done() + // If the seed ligates to itself, we can call it done with a successful circularization! + if seedFragment.ForwardOverhang == seedFragment.ReverseOverhang { + c <- seedFragment.ForwardOverhang + seedFragment.Sequence + } else { + for _, newFragment := range fragmentList { + // If the seedFragment's reverse overhang is ligates to a fragment's forward overhang, we can ligate those together and seed another ligation reaction + if seedFragment.ReverseOverhang == newFragment.ForwardOverhang { + newSeed := Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + newFragment.Sequence, seedFragment.ForwardOverhang, newFragment.ReverseOverhang} + wg.Add(1) + go recurseLigate(wg, c, newSeed, fragmentList) + } + // This checks if we can ligate the next fragment in its reverse direction. We have to be careful though - if our seed has a palindrome, it will ligate to itself + // like [-> <- -> <- -> ...] infinitely. We check for that case here as well. + if (seedFragment.ReverseOverhang == ReverseComplement(newFragment.ReverseOverhang)) && (seedFragment.ReverseOverhang != ReverseComplement(seedFragment.ReverseOverhang)) { // If the second statement isn't there, program will crash on palindromes + newSeed := Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + ReverseComplement(newFragment.Sequence), seedFragment.ForwardOverhang, ReverseComplement(newFragment.ForwardOverhang)} + wg.Add(1) + go recurseLigate(wg, c, newSeed, fragmentList) + } + } + } +} + +func getConstructs(c chan string, constructSequences chan []CloneSequence) { + var constructs []CloneSequence + var exists bool + var existingSeqhashes []string + for { + construct, more := <-c + if more { + exists = false + seqhashConstruct, _ := Hash(construct, "DNA", true, true) + // Check if this construct is unique + for _, existingSeqhash := range existingSeqhashes { + if existingSeqhash == seqhashConstruct { + exists = true + } + } + if !exists { + constructs = append(constructs, CloneSequence{construct, true}) + existingSeqhashes = append(existingSeqhashes, seqhashConstruct) + } + } else { + constructSequences <- constructs + close(constructSequences) + return + } + } +} + +// CircularLigate simulates ligation of all possible fragment combinations into circular plasmids. +func CircularLigate(fragments []Fragment) []CloneSequence { + var wg sync.WaitGroup + var constructs []CloneSequence + c := make(chan string) //, maxClones) // A buffered channel is needed to prevent blocking. + constructSequences := make(chan []CloneSequence) + for _, fragment := range fragments { + wg.Add(1) + go recurseLigate(&wg, c, fragment, fragments) + } + go getConstructs(c, constructSequences) + wg.Wait() + close(c) + constructs = <-constructSequences + return constructs +} + +/****************************************************************************** + +Specific cloning functions begin here. + +******************************************************************************/ + +// GoldenGate simulates a GoldenGate cloning reaction. As of right now, we only +// support BsaI, BbsI, BtgZI, and BsmBI. +func GoldenGate(sequences []CloneSequence, enzymeStr string) ([]CloneSequence, error) { + var fragments []Fragment + for _, sequence := range sequences { + newFragments, err := CutWithEnzymeByName(sequence, true, enzymeStr) + if err != nil { + return []CloneSequence{}, err + } + fragments = append(fragments, newFragments...) + } + return CircularLigate(fragments), nil +} diff --git a/clone_test.go b/clone_test.go new file mode 100644 index 00000000..23a1b303 --- /dev/null +++ b/clone_test.go @@ -0,0 +1,137 @@ +package poly + +import ( + "fmt" + "log" + "testing" +) + +// pOpen plasmid series (https://stanford.freegenes.org/collections/open-genes/products/open-plasmids#description). I use it for essentially all my cloning. -Keoni +var popen = CloneSequence{"TAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGTAgtcttcGCcatcgCtACTAAAagccagataacagtatgcgtatttgcgcgctgatttttgcggtataagaatatatactgatatgtatacccgaagtatgtcaaaaagaggtatgctatgaagcagcgtattacagtgacagttgacagcgacagctatcagttgctcaaggcatatatgatgtcaatatctccggtctggtaagcacaaccatgcagaatgaagcccgtcgtctgcgtgccgaacgctggaaagcggaaaatcaggaagggatggctgaggtcgcccggtttattgaaatgaacggctcttttgctgacgagaacagggGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATTAACCTGATGTTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGgcgatgttgaagaccaCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGG", true} + +func TestCutWithEnzymeByName(t *testing.T) { + _, err := CutWithEnzymeByName(popen, true, "EcoFake") + if err == nil { + log.Fatalf("CutWithEnzymeByName should have failed when looking for fake restriction enzyme EcoFake") + } +} + +func TestCutWithEnzyme(t *testing.T) { + var seq CloneSequence + bsai := "GGTCTCAATGC" + bsaiComplement := "ATGCAGAGACC" + + // test(1) + // Test case of `<-bsaiComplement bsai-> <-bsaiComplement bsai->` where bsaI cuts off of a linear sequence. This tests the line: + // if !seq.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.EnzymeSkip+enzyme.EnzymeOverhangLen > len(sequence)) + seq = CloneSequence{"ATATATA" + bsaiComplement + bsai + "ATGCATCGATCGACTAGCATG" + bsaiComplement + bsai[:8], false} + frag, err := CutWithEnzymeByName(seq, true, "BsaI") + if err != nil { + log.Fatalf("CutWithEnzyme should not have failed on test(1). Got error: %s", err) + } + if len(frag) != 1 { + log.Fatalf("CutWithEnzyme in test(1) should be 1 fragment in length") + } + if frag[0].Sequence != "ATGCATCGATCGACTAGCATG" { + log.Fatalf("CutWithEnzyme in test(1) should give fragment with sequence ATGCATCGATCGACTAGCATG . Got sequence: %s", frag[0].Sequence) + } + + // test(2) + // Now if we take the same sequence and circularize it, we get a different result + seq.Circular = true + frag, err = CutWithEnzymeByName(seq, true, "BsaI") + if err != nil { + log.Fatalf("CutWithEnzyme should not have failed on test(2). Got error: %s", err) + } + if len(frag) != 2 { + log.Fatalf("CutWithEnzyme in test(2) should be 1 fragment in length") + } + if frag[0].Sequence != "ATGCATCGATCGACTAGCATG" || frag[1].Sequence != "TATA" { + log.Fatalf("CutWithEnzyme in test(2) should give fragment with sequence ATGCATCGATCGACTAGCATG and TATA. Got sequence: %s and %s", frag[0].Sequence, frag[1].Sequence) + } + + // test(3) + // Let's test if we have a single cut in our plasmid. This should give + // different results if we have a linear or circular DNA. Since single cuts + // will give no fragments if you test for directionality, we set the + // directionality flag to false. This tests the line: + // if len(overhangs) == 1 && !directional && !seq.Circular + seq = CloneSequence{"ATATATATATATATAT" + bsai + "GCGCGCGCGCGCGCGCGCGC", false} + frag, err = CutWithEnzymeByName(seq, false, "BsaI") + if err != nil { + log.Fatalf("CutWithEnzyme should not have failed on test(3). Got error: %s", err) + } + if len(frag) != 2 { + log.Fatalf("Cutting a linear fragment with a single cut site should give 2 fragments") + } + if frag[0].Sequence != "GCGCGCGCGCGCGCGCGCGC" || frag[1].Sequence != "ATATATATATATATATGGTCTCA" { + log.Fatalf("CutWithEnzyme in test(3) should give fragment with sequence GCGCGCGCGCGCGCGCGCGC and ATATATATATATATATGGTCTCA. Got sequence: %s and %s", frag[0].Sequence, frag[1].Sequence) + } + + // test(4) + // This tests for the above except with a circular fragment. Specifically, it + // tests the line: + // if len(overhangs) == 2 && !directional && seq.Circular + seq.Circular = true + frag, err = CutWithEnzymeByName(seq, false, "BsaI") + if err != nil { + log.Fatalf("CutWithEnzyme should not have failed on test(4). Got error: %s", err) + } + if len(frag) != 1 { + log.Fatalf("Cutting a circular fragment with a single cut site should give 1 fragments") + } + if frag[0].Sequence != "GCGCGCGCGCGCGCGCGCGCATATATATATATATATGGTCTCA" { + log.Fatalf("CutWithEnzyme in test(4) should give fragment with sequence ATATATATATATATATGGTCTCA. Got Sequence: %s", frag[0].Sequence) + } + + // test(5) + // This tests if we have a fragment where we do not care about directionality + // but have more than 1 cut site in our fragment. We can use pOpen for this. + frag, err = CutWithEnzymeByName(popen, false, "BbsI") + if err != nil { + log.Fatalf("CutWithEnzyme should not have failed on test(5). Got error: %s", err) + } + if len(frag) != 2 { + log.Fatalf("Cutting pOpen without a direction should yield 2 fragments") + } +} + +func TestCircularLigate(t *testing.T) { + // The following tests for complementing overhangs. Specific, this line: + // newSeed := Fragment{seedFragment.Sequence + seedFragment.ReverseOverhang + ReverseComplement(newFragment.Sequence), seedFragment.ForwardOverhang, ReverseComplement(newFragment.ForwardOverhang)} + fragment1 := Fragment{"AAAAAA", "GTTG", "CTAT"} + fragment2 := Fragment{"AAAAAA", "CAAC", "ATAG"} + outputConstructs := CircularLigate([]Fragment{fragment1, fragment2}) + if len(outputConstructs) != 1 { + fmt.Println(outputConstructs) + log.Fatalf("Circular ligation with complementing overhangs should only output 1 valid rotated sequence.") + } +} + +func TestGoldenGate(t *testing.T) { + // Here we test if the enzyme we want to use in a GoldenGate reaction does not exist in our enzyme pool + fragment1 := CloneSequence{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + fragment2 := CloneSequence{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + + _, err := GoldenGate([]CloneSequence{fragment1, fragment2, popen}, "EcoRFake") + if err == nil { + log.Fatalf("GoldenGate should fail when using enzyme EcoRFake") + } + if err.Error() != "Enzyme EcoRFake not found in enzymeMap" { + log.Fatalf("Failure of GoldenGate on incorrect enzyme should follow the exact string `Enzyme EcoRFake not found in enzymeMap`. Got: %s", err.Error()) + } +} + +func ExampleGoldenGate() { + // Fragment 1 has a palindrome at its start. This isn't very common but + // can occur. These two fragments are real DNA fragments used in the + // FreeGenes Project. They are used because they were on my computer + // - Keoni + fragment1 := CloneSequence{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + fragment2 := CloneSequence{"GAAGTGCCATTCCGCCTGACCTGAAGACCAGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGCGTCTTCAGGCTAGGTGGAGGCTCAGTG", false} + + Clones, _ := GoldenGate([]CloneSequence{fragment1, fragment2, popen}, "BbsI") + + fmt.Println(RotateSequence(Clones[0].Sequence)) + // Output: AAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGAAACACGTGGCAAACATTCCGGTCTCAAATGGAAAAGAGCAACGAAACCAACGGCTACCTTGACAGCGCTCAAGCCGGCCCTGCAGCTGGCCCGGGCGCTCCGGGTACCGCCGCGGGTCGTGCACGTCGTTGCGCGGGCTTCCTGCGGCGCCAAGCGCTGGTGCTGCTCACGGTGTCTGGTGTTCTGGCAGGCGCCGGTTTGGGCGCGGCACTGCGTGGGCTCAGCCTGAGCCGCACCCAGGTCACCTACCTGGCCTTCCCCGGCGAGATGCTGCTCCGCATGCTGCGCATGATCATCCTGCCGCTGGTGGTCTGCAGCCTGGTGTCGGGCGCCGCCTCCCTCGATGCCAGCTGCCTCGGGCGTCTGGGCGGTATCGCTGTCGCCTACTTTGGCCTCACCACACTGAGTGCCTCGGCGCTCGCCGTGGCCTTGGCGTTCATCATCAAGCCAGGATCCGGTGCGCAGACCCTTCAGTCCAGCGACCTGGGGCTGGAGGACTCGGGGCCTCCTCCTGTCCCCAAAGAAACGGTGGACTCTTTCCTCGACCTGGCCAGAAACCTGTTTCCCTCCAATCTTGTGGTTGCAGCTTTCCGTACGTATGCAACCGATTATAAAGTCGTGACCCAGAACAGCAGCTCTGGAAATGTAACCCATGAAAAGATCCCCATAGGCACTGAGATAGAAGGGATGAACATTTTAGGATTGGTCCTGTTTGCTCTGGTGTTAGGAGTGGCCTTAAAGAAACTAGGCTCCGAAGGAGAGGACCTCATCCGTTTCTTCAATTCCCTCAACGAGGCGACGATGGTGCTGGTGTCCTGGATTATGTGGTACGTACCTGTGGGCATCATGTTCCTTGTTGGAAGCAAGATCGTGGAAATGAAAGACATCATCGTGCTGGTGACCAGCCTGGGGAAATACATCTTCGCATCTATATTGGGCCACGTCATTCATGGTGGTATCGTCCTGCCGCTGATTTATTTTGTTTTCACACGAAAAAACCCATTCAGATTCCTCCTGGGCCTCCTCGCCCCATTTGCGACAGCATTTGCTACGTGCTCCAGCTCAGCGACCCTTCCCTCTATGATGAAGTGCATTGAAGAGAACAATGGTGTGGACAAGAGGATCTCCAGGTTTATTCTCCCCATCGGGGCCACCGTGAACATGGACGGAGCAGCCATCTTCCAGTGTGTGGCCGCGGTGTTCATTGCGCAACTCAACAACGTAGAGCTCAACGCAGGACAGATTTTCACCATTCTAGTGACTGCCACAGCGTCCAGTGTTGGAGCAGCAGGCGTGCCAGCTGGAGGGGTCCTCACCATTGCCATTATCCTGGAGGCCATTGGGCTGCCTACTCATGATCTGCCTCTGATCCTGGCTGTGGACTGGATTGTGGACCGGACCACCACGGTGGTGAATGTGGAAGGGGATGCCCTGGGTGCAGGCATTCTCCACCACCTGAATCAGAAGGCAACAAAGAAAGGCGAGCAGGAACTTGCTGAGGTGAAAGTGGAAGCCATCCCCAACTGCAAGTCTGAGGAGGAAACCTCGCCCCTGGTGACACACCAGAACCCCGCTGGCCCCGTGGCCAGTGCCCCAGAACTGGAATCCAAGGAGTCGGTTCTGTGAAGAGCTTAGAGACCGACGACTGCCTAAGGACATTCGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAG +} diff --git a/sequence.go b/sequence.go index 7fd24a8d..9bd9ccd4 100644 --- a/sequence.go +++ b/sequence.go @@ -70,6 +70,12 @@ func ComplementBase(basePair rune) rune { return complementBaseRuneMap[basePair] } +// IsPalindromic accepts a sequence of even length and returns if it is +// palindromic. More here - https://en.wikipedia.org/wiki/Palindromic_sequence +func IsPalindromic(sequence string) bool { + return sequence == ReverseComplement(sequence) +} + //RandomProteinSequence returns a random protein sequence as a string that have size length, starts with aminoacid M (Methionine) and finishes with * (stop codon). The random generator uses the seed provided as parameter. func RandomProteinSequence(length int, seed int64) (string, error) { //The length needs to be greater than two because the random protein sequenced returned always contain a start and stop codon. You could see more about this stuff here: https://en.wikipedia.org/wiki/Genetic_code#Start_and_stop_codons diff --git a/sequence_test.go b/sequence_test.go index 446fa877..71ccd9e9 100644 --- a/sequence_test.go +++ b/sequence_test.go @@ -139,3 +139,14 @@ func TestRandomProteinSequenceError(t *testing.T) { t.Errorf("Random sequence must have sequence size equals 0 'RandomSequence(2, 4)'. Got this: \n%s instead of \n%s", strconv.Itoa(len(sequence)), strconv.Itoa(length)) } } + +func TestIsPalindromic(t *testing.T) { + ecori := IsPalindromic("GAATTC") + if ecori != true { + t.Errorf("IsPalindromic failed to call EcoRI a palindrome") + } + bsai := IsPalindromic("GGTCTC") + if bsai != false { + t.Errorf("IsPalindromic failed call BsaI NOT a palindrome") + } +}