Skip to content

Commit

Permalink
extract
Browse files Browse the repository at this point in the history
  • Loading branch information
TwFlem committed Dec 6, 2023
1 parent 2d09fa9 commit bb3cf93
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 28 deletions.
37 changes: 35 additions & 2 deletions bwt/bwt.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,30 @@ func (bwt BWT) Locate(pattern string) []int {
return offsets
}

// TODO: do we want to ignore the $?
func (bwt BWT) Extract(start, end int) string {
if end > bwt.getLenOfOriginalString() {
panic("figure out what we want to do here")
}

strB := strings.Builder{}
for i := start; i < end; i++ {
fPos := bwt.reverseCharacterLookup(i)
skip := bwt.lookupSkipByOffset(fPos)
strB.WriteByte(skip.char)
}
return strB.String()
}

func (bwt BWT) reverseCharacterLookup(originalPos int) int {
for i := range bwt.suffixArray {
if bwt.suffixArray[i] == originalPos {
return i
}
}
panic("figure out what to do here")
}

func (bwt BWT) lfSearch(pattern string) interval {
searchRange := interval{start: 0, end: bwt.getLenOfOriginalString()}
for i := 0; i < len(pattern); i++ {
Expand All @@ -47,7 +71,7 @@ func (bwt BWT) lfSearch(pattern string) interval {
}

c := pattern[len(pattern)-1-i]
skip, ok := bwt.lookupSkip(c)
skip, ok := bwt.lookupSkipByChar(c)
if !ok {
return interval{}
}
Expand All @@ -57,7 +81,7 @@ func (bwt BWT) lfSearch(pattern string) interval {
return searchRange
}

func (bwt BWT) lookupSkip(c byte) (entry skipEntry, ok bool) {
func (bwt BWT) lookupSkipByChar(c byte) (entry skipEntry, ok bool) {
for i := range bwt.skipList {
if bwt.skipList[i].char == c {
return bwt.skipList[i], true
Expand All @@ -66,6 +90,15 @@ func (bwt BWT) lookupSkip(c byte) (entry skipEntry, ok bool) {
return skipEntry{}, false
}

func (bwt BWT) lookupSkipByOffset(offset int) skipEntry {
for i := range bwt.skipList {
if bwt.skipList[i].openEndedInterval.start <= offset && offset < bwt.skipList[i].openEndedInterval.end {
return bwt.skipList[i]
}
}
panic("figure out what to do here")
}

func (bwt BWT) getLenOfOriginalString() int {
return bwt.skipList[len(bwt.skipList)-1].openEndedInterval.end
}
Expand Down
96 changes: 70 additions & 26 deletions bwt/bwt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,36 +77,80 @@ func TestBWT_Locate(t *testing.T) {
}
}

func BenchmarkBWTBuildPower12(b *testing.B) {
base := "!BANANA!"
BaseBenchmarkBWTBuild(base, 12, b)
type BWTExtractTestCase struct {
start int
end int
expected string
}

//go:noinline
func BaseBenchmarkBWTBuild(base string, power int, b *testing.B) {
for n := 0; n < b.N; n++ {
buildBWTForBench(base, power)
}
}

func buildBWTForBench(base string, power int) BWT {
test := base
for i := 0; i < power; i++ {
test += test
}
func TestBWT_Extract(t *testing.T) {
baseTestStr := "thequickbrownfoxjumpsoverthelazydogwithanovertfrownafterfumblingitsparallelogramshapedbananagramallarounddowntown" // len == 112
testStr := strings.Join([]string{baseTestStr, baseTestStr, baseTestStr}, "")

return New(test)
}
bwt := New(testStr)

func BenchmarkBWTQueryPower12(b *testing.B) {
base := "!BANANA!"
bwt := buildBWTForBench(base, 12)
BaseBenchmarkBWTQuery(bwt, "ANANABANANA", b)
}
testTable := []BWTExtractTestCase{
{4, 8, "uick"},
{117, 121, "uick"},
{230, 234, "uick"},
{0, 3, "the"},
{25, 28, "the"},
{113, 116, "the"},
{138, 141, "the"},
{226, 229, "the"},
{251, 254, "the"},
{21, 25, "over"},
{41, 45, "over"},
{134, 138, "over"},
{154, 158, "over"},
{247, 251, "over"},
{267, 271, "over"},
{10, 13, "own"},
{48, 51, "own"},
{106, 109, "own"},
{123, 126, "own"},
{161, 164, "own"},
{219, 222, "own"},
{223, 226, "own"},
{236, 239, "own"},
{274, 277, "own"},
{332, 335, "own"},
{336, 339, "own"},
{87, 90, "ana"},
{89, 92, "ana"},
{200, 203, "ana"},
{202, 205, "ana"},
{313, 316, "ana"},
{315, 318, "ana"},
{39, 41, "an"},
{87, 89, "an"},
{152, 154, "an"},
{200, 202, "an"},
{202, 204, "an"},
{265, 267, "an"},
{313, 315, "an"},
{50, 52, "na"},
{88, 90, "na"},
{163, 165, "na"},
{201, 203, "na"},
{203, 205, "na"},
{276, 278, "na"},
{314, 316, "na"},
{316, 318, "na"},
{9, 13, "rown"},
{47, 51, "rown"},
{122, 126, "rown"},
{160, 164, "rown"},
{235, 239, "rown"},
{273, 277, "rown"},
{109, 116, "townthe"},
{222, 229, "townthe"},
}

//go:noinline
func BaseBenchmarkBWTQuery(bwt BWT, seq string, b *testing.B) {
for n := 0; n < b.N; n++ {
bwt.Count(seq)
for _, v := range testTable {
str := bwt.Extract(v.start, v.end)
if str != v.expected {
t.Fatalf("extractRange=(%d, %d) expected=%s actual=%s", v.start, v.end, v.expected, str)
}
}
}

0 comments on commit bb3cf93

Please sign in to comment.