-
-
Notifications
You must be signed in to change notification settings - Fork 73
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cloning refactor and data race fix #393
Changes from 10 commits
ea89331
3700cbb
db1f25a
2933f12
7a733c7
3941e24
d8c803a
bda46d6
a241f1d
99ed49c
b7b6f02
de9226d
69819b3
9e8f947
7fe5871
3f8932a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,7 +46,6 @@ import ( | |
"regexp" | ||
"sort" | ||
"strings" | ||
"sync" | ||
|
||
"github.com/TimothyStiles/poly/checks" | ||
"github.com/TimothyStiles/poly/seqhash" | ||
|
@@ -83,15 +82,27 @@ type Enzyme struct { | |
RegexpFor *regexp.Regexp | ||
RegexpRev *regexp.Regexp | ||
Skip int | ||
OverhangLen int | ||
OverheadLength int | ||
RecognitionSite string | ||
} | ||
|
||
// Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase | ||
var enzymeMap = map[string]Enzyme{ | ||
"BsaI": {"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"}, | ||
"BbsI": {"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"}, | ||
"BtgZI": {"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"}, | ||
// EnzymeManager manager for Enzymes. Allows for management of enzymes throughout the lifecyle of your | ||
// program. EnzymeManager is not safe for concurrent use. | ||
type EnzymeManager struct { | ||
// enzymeMap Map of enzymes that exist for the lifetime of the manager. Not safe for concurrent use. | ||
enzymeMap map[string]Enzyme | ||
} | ||
|
||
// NewEnzymeManager creates a new EnzymeManager given some enzymes. | ||
func NewEnzymeManager(enzymes []Enzyme) EnzymeManager { | ||
enzymeMap := make(map[string]Enzyme) | ||
for enzymeIndex := range enzymes { | ||
enzymeMap[enzymes[enzymeIndex].Name] = enzymes[enzymeIndex] | ||
} | ||
|
||
return EnzymeManager{ | ||
enzymeMap: enzymeMap, | ||
} | ||
} | ||
|
||
/****************************************************************************** | ||
|
@@ -100,30 +111,27 @@ Base cloning functions begin here. | |
|
||
******************************************************************************/ | ||
|
||
func getBaseRestrictionEnzymes() map[string]Enzyme { | ||
return enzymeMap | ||
} | ||
|
||
// CutWithEnzymeByName cuts a given sequence with an enzyme represented by the | ||
// enzyme's name. It is a convenience wrapper around CutWithEnzyme that | ||
// allows us to specify the enzyme by name. | ||
func CutWithEnzymeByName(seq Part, directional bool, enzymeStr string) ([]Fragment, error) { | ||
enzymeMap := getBaseRestrictionEnzymes() | ||
if _, ok := enzymeMap[enzymeStr]; !ok { | ||
return []Fragment{}, errors.New("Enzyme " + enzymeStr + " not found in enzymeMap") | ||
func (enzymeManager EnzymeManager) CutWithEnzymeByName(part Part, directional bool, name string) ([]Fragment, error) { | ||
// Get the enzyme from the enzyme map | ||
if enzyme, ok := enzymeManager.enzymeMap[name]; ok { | ||
// Cut the sequence with the enzyme | ||
return CutWithEnzyme(part, directional, enzyme), nil | ||
} | ||
enzyme := enzymeMap[enzymeStr] | ||
return CutWithEnzyme(seq, directional, enzyme), nil | ||
// Return an error if the enzyme is not found | ||
return []Fragment{}, errors.New("Enzyme " + name + " not found") | ||
} | ||
|
||
// CutWithEnzyme cuts a given sequence with an enzyme represented by an Enzyme struct. | ||
func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | ||
func CutWithEnzyme(part Part, directional bool, enzyme Enzyme) []Fragment { | ||
var fragmentSeqs []string | ||
var sequence string | ||
if seq.Circular { | ||
sequence = strings.ToUpper(seq.Sequence + seq.Sequence) | ||
if part.Circular { | ||
sequence = strings.ToUpper(part.Sequence + part.Sequence) | ||
} else { | ||
sequence = strings.ToUpper(seq.Sequence) | ||
sequence = strings.ToUpper(part.Sequence) | ||
} | ||
|
||
// Check for palindromes | ||
|
@@ -135,20 +143,20 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | |
var reverseOverhangs []Overhang | ||
forwardCuts := enzyme.RegexpFor.FindAllStringIndex(sequence, -1) | ||
for _, forwardCut := range forwardCuts { | ||
forwardOverhangs = append(forwardOverhangs, Overhang{Length: enzyme.OverhangLen, Position: forwardCut[1] + enzyme.Skip, Forward: true, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) | ||
forwardOverhangs = append(forwardOverhangs, Overhang{Length: enzyme.OverheadLength, Position: forwardCut[1] + enzyme.Skip, Forward: true, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) | ||
} | ||
// Palindromic enzymes won't need reverseCuts | ||
if !palindromic { | ||
reverseCuts := enzyme.RegexpRev.FindAllStringIndex(sequence, -1) | ||
for _, reverseCut := range reverseCuts { | ||
reverseOverhangs = append(reverseOverhangs, Overhang{Length: enzyme.OverhangLen, Position: reverseCut[0] - enzyme.Skip, Forward: false, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) | ||
reverseOverhangs = append(reverseOverhangs, Overhang{Length: enzyme.OverheadLength, Position: reverseCut[0] - enzyme.Skip, Forward: false, RecognitionSitePlusSkipLength: len(enzyme.RecognitionSite) + enzyme.Skip}) | ||
} | ||
} | ||
|
||
// If, on a linear sequence, the last overhang's position + EnzymeSkip + EnzymeOverhangLen is over the length of the sequence, remove that overhang. | ||
for _, overhangSet := range [][]Overhang{forwardOverhangs, reverseOverhangs} { | ||
if len(overhangSet) > 0 { | ||
if !seq.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.Skip+enzyme.OverhangLen > len(sequence)) { | ||
if !part.Circular && (overhangSet[len(overhangSet)-1].Position+enzyme.Skip+enzyme.OverheadLength > len(sequence)) { | ||
overhangSet = overhangSet[:len(overhangSet)-1] | ||
} | ||
} | ||
|
@@ -166,7 +174,7 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | |
var nextOverhang Overhang | ||
// Linear fragments with 1 cut that are no directional will always give a | ||
// 2 fragments | ||
if len(overhangs) == 1 && !directional && !seq.Circular { // Check the case of a single cut | ||
if len(overhangs) == 1 && !directional && !part.Circular { // Check the case of a single cut | ||
// In the case of a single cut in a linear sequence, we get two fragments with only 1 stick end | ||
fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length:] | ||
fragmentSeq2 := sequence[:overhangs[0].Position] | ||
|
@@ -179,9 +187,9 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | |
// Circular fragments with 1 cut will always have 2 overhangs (because of the | ||
// concat earlier). If we don't require directionality, this will always get | ||
// cut into a single fragment | ||
if len(overhangs) == 2 && !directional && seq.Circular { | ||
if len(overhangs) == 2 && !directional && part.Circular { | ||
// In the case of a single cut in a circular sequence, we get one fragment out with sticky overhangs | ||
fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length : len(seq.Sequence)] | ||
fragmentSeq1 := sequence[overhangs[0].Position+overhangs[0].Length : len(part.Sequence)] | ||
fragmentSeq2 := sequence[:overhangs[0].Position] | ||
fragmentSeq := fragmentSeq1 + fragmentSeq2 | ||
overhangSeq := sequence[overhangs[0].Position : overhangs[0].Position+overhangs[0].Length] | ||
|
@@ -209,12 +217,12 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | |
} | ||
// We have to subtract RecognitionSitePlusSkipLength in case we have a recognition site on | ||
// one side of the origin of a circular sequence and the cut site on the other side of the origin | ||
if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(seq.Sequence) { | ||
if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(part.Sequence) { | ||
break | ||
} | ||
} else { | ||
fragmentSeqs = append(fragmentSeqs, sequence[currentOverhang.Position:nextOverhang.Position]) | ||
if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(seq.Sequence) { | ||
if nextOverhang.Position-nextOverhang.RecognitionSitePlusSkipLength > len(part.Sequence) { | ||
break | ||
} | ||
} | ||
|
@@ -224,9 +232,9 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | |
// Minimum lengths (given oligos) for assembly is 8 base pairs | ||
// https://doi.org/10.1186/1756-0500-3-291 | ||
if len(fragment) > 8 { | ||
fragmentSequence := fragment[enzyme.OverhangLen : len(fragment)-enzyme.OverhangLen] | ||
forwardOverhang := fragment[:enzyme.OverhangLen] | ||
reverseOverhang := fragment[len(fragment)-enzyme.OverhangLen:] | ||
fragmentSequence := fragment[enzyme.OverheadLength : len(fragment)-enzyme.OverheadLength] | ||
forwardOverhang := fragment[:enzyme.OverheadLength] | ||
reverseOverhang := fragment[len(fragment)-enzyme.OverheadLength:] | ||
fragments = append(fragments, Fragment{Sequence: fragmentSequence, ForwardOverhang: forwardOverhang, ReverseOverhang: reverseOverhang}) | ||
} | ||
} | ||
|
@@ -235,12 +243,17 @@ func CutWithEnzyme(seq Part, directional bool, enzyme Enzyme) []Fragment { | |
return fragments | ||
} | ||
|
||
func recurseLigate(wg *sync.WaitGroup, constructs chan string, infiniteLoopingConstructs chan string, seedFragment Fragment, fragmentList []Fragment, usedFragments []Fragment) { | ||
func recurseLigate(seedFragment Fragment, fragmentList []Fragment, usedFragments []Fragment, existingSeqhashes map[string]struct{}) (openConstructs []string, infiniteConstructs []string) { | ||
// Recurse ligate simulates all possible ligations of a series of fragments. Each possible combination begins with a "seed" that fragments from the pool can be added to. | ||
defer wg.Done() | ||
// If the seed ligates to itself, we can call it done with a successful circularization! | ||
if seedFragment.ForwardOverhang == seedFragment.ReverseOverhang { | ||
constructs <- seedFragment.ForwardOverhang + seedFragment.Sequence | ||
construct := seedFragment.ForwardOverhang + seedFragment.Sequence | ||
seqhash, _ := seqhash.Hash(construct, "DNA", true, true) | ||
if _, ok := existingSeqhashes[seqhash]; ok { | ||
return nil, nil | ||
} | ||
existingSeqhashes[seqhash] = struct{}{} | ||
return []string{construct}, nil | ||
} else { | ||
for _, newFragment := range fragmentList { | ||
// If the seedFragment's reverse overhang is ligates to a fragment's forward overhang, we can ligate those together and seed another ligation reaction | ||
|
@@ -262,67 +275,40 @@ func recurseLigate(wg *sync.WaitGroup, constructs chan string, infiniteLoopingCo | |
// If the newFragment's reverse complement already exists in the used fragment list, we need to cancel the recursion. | ||
for _, usedFragment := range usedFragments { | ||
if usedFragment.Sequence == newFragment.Sequence { | ||
infiniteLoopingConstructs <- usedFragment.ForwardOverhang + usedFragment.Sequence + usedFragment.ReverseOverhang | ||
return | ||
infiniteConstruct := usedFragment.ForwardOverhang + usedFragment.Sequence + usedFragment.ReverseOverhang | ||
seqhash, _ := seqhash.Hash(infiniteConstruct, "DNA", false, true) | ||
if _, ok := existingSeqhashes[seqhash]; ok { | ||
return nil, nil | ||
} | ||
existingSeqhashes[seqhash] = struct{}{} | ||
return nil, []string{infiniteConstruct} | ||
} | ||
} | ||
wg.Add(1) | ||
// If everything is clear, append fragment to usedFragments and recurse. | ||
usedFragments = append(usedFragments, newFragment) | ||
go recurseLigate(wg, constructs, infiniteLoopingConstructs, newSeed, fragmentList, usedFragments) | ||
} | ||
} | ||
} | ||
} | ||
oc, ic := recurseLigate(newSeed, fragmentList, usedFragments, existingSeqhashes) | ||
|
||
func getConstructs(c chan string, constructSequences chan []string, circular bool) { | ||
var constructs []string | ||
var exists bool | ||
var existingSeqhashes []string | ||
for { | ||
construct, more := <-c | ||
if more { | ||
exists = false | ||
seqhashConstruct, _ := seqhash.Hash(construct, "DNA", circular, true) | ||
// Check if this construct is unique | ||
for _, existingSeqhash := range existingSeqhashes { | ||
if existingSeqhash == seqhashConstruct { | ||
exists = true | ||
} | ||
openConstructs = append(openConstructs, oc...) | ||
infiniteConstructs = append(infiniteConstructs, ic...) | ||
} | ||
if !exists { | ||
constructs = append(constructs, construct) | ||
existingSeqhashes = append(existingSeqhashes, seqhashConstruct) | ||
} | ||
} else { | ||
constructSequences <- constructs | ||
close(constructSequences) | ||
return | ||
} | ||
} | ||
|
||
return openConstructs, infiniteConstructs | ||
} | ||
|
||
// CircularLigate simulates ligation of all possible fragment combinations into circular plasmids. | ||
func CircularLigate(fragments []Fragment) ([]string, []string, error) { | ||
var wg sync.WaitGroup | ||
func CircularLigate(fragments []Fragment) ([]string, []string) { | ||
var outputConstructs []string | ||
var outputInfiniteLoopingConstructs []string | ||
constructs := make(chan string) | ||
infiniteLoopingConstructs := make(chan string) // sometimes we will get stuck in infinite loops. These are sequences with a recursion break | ||
constructSequences := make(chan []string) | ||
infiniteLoopingConstructSequences := make(chan []string) | ||
existingSeqhashes := make(map[string]struct{}) | ||
for _, fragment := range fragments { | ||
wg.Add(1) | ||
go recurseLigate(&wg, constructs, infiniteLoopingConstructs, fragment, fragments, []Fragment{}) | ||
openConstructs, infiniteConstructs := recurseLigate(fragment, fragments, []Fragment{}, existingSeqhashes) | ||
|
||
outputConstructs = append(outputConstructs, openConstructs...) | ||
outputInfiniteLoopingConstructs = append(outputInfiniteLoopingConstructs, infiniteConstructs...) | ||
} | ||
go getConstructs(constructs, constructSequences, true) | ||
go getConstructs(infiniteLoopingConstructs, infiniteLoopingConstructSequences, false) | ||
wg.Wait() | ||
close(constructs) | ||
close(infiniteLoopingConstructs) | ||
outputConstructs = <-constructSequences | ||
outputInfiniteLoopingConstructs = <-infiniteLoopingConstructSequences | ||
return outputConstructs, outputInfiniteLoopingConstructs, nil | ||
return outputConstructs, outputInfiniteLoopingConstructs | ||
} | ||
|
||
/****************************************************************************** | ||
|
@@ -333,14 +319,24 @@ Specific cloning functions begin here. | |
|
||
// GoldenGate simulates a GoldenGate cloning reaction. As of right now, we only | ||
// support BsaI, BbsI, BtgZI, and BsmBI. | ||
func GoldenGate(sequences []Part, enzymeStr string) ([]string, []string, error) { | ||
func (enzymeManager EnzymeManager) GoldenGate(sequences []Part, enzymeStr string) (openConstructs []string, infiniteLoops []string, err error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After learning more about how Golden Gate works IRL and a little bit more about SynBio- probably not. It seems more appropriate for Golden Gate to just accept something like a 'cuttingEnzyme' of type 'Enzyme' and have the caller lookup whatever enzyme they want however they want. This is also nice because now Golden Gate doesn't have to return an error anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally would like the change of GoldenGate to be its own function, independent of an enzymeManager. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to no longer be a receiver |
||
var fragments []Fragment | ||
for _, sequence := range sequences { | ||
newFragments, err := CutWithEnzymeByName(sequence, true, enzymeStr) | ||
newFragments, err := enzymeManager.CutWithEnzymeByName(sequence, true, enzymeStr) | ||
if err != nil { | ||
return []string{}, []string{}, err | ||
} | ||
fragments = append(fragments, newFragments...) | ||
} | ||
return CircularLigate(fragments) | ||
oc, il := CircularLigate(fragments) | ||
return oc, il, nil | ||
} | ||
|
||
// GetBaseRestrictionEnzymes return a basic slice of common enzymes used in Golden Gate Assembly. Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase | ||
func GetBaseRestrictionEnzymes() []Enzyme { | ||
return []Enzyme{ | ||
{"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"}, | ||
{"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"}, | ||
{"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"}, | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could there be some explanation of the logic behind this line and what it's doing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that is if the recognition sequence is at the very end of a circular DNA, and then cutting on the other side (ie, the beginning of the sequence).