-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
backend: move chunking to llm package
- Loading branch information
1 parent
fb1f88a
commit caf0d10
Showing
4 changed files
with
65 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package llms | ||
|
||
import ( | ||
"strings" | ||
) | ||
|
||
func GenerateChunks(input string) []string { | ||
results := []string{} | ||
|
||
// Only trim leading/trailing spaces | ||
input = strings.TrimSpace(input) | ||
|
||
// Split by periods but add them back | ||
sentences := strings.Split(input+".", ".") | ||
for i, sentence := range sentences { | ||
// Skip the last empty element caused by our added period | ||
if i == len(sentences)-1 && sentence == "" { | ||
break | ||
} | ||
|
||
// Only trim leading spaces, preserve newlines and trailing spaces | ||
sentence = strings.TrimLeft(sentence, " ") | ||
if sentence == "" { | ||
continue | ||
} | ||
|
||
results = append(results, sentence+".") | ||
} | ||
|
||
return results | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package llms | ||
|
||
import ( | ||
"log" | ||
"testing" | ||
) | ||
|
||
func TestChunkCardBody(t *testing.T) { | ||
input := `Lorem ipsum odor amet, consectetuer adipiscing elit. Luctus egestas lobortis cursus mollis facilisi. Scelerisque vel litora rhoncus porttitor eros. Lacus orci morbi a varius lobortis rutrum interdum per. Nostra commodo phasellus etiam morbi metus porttitor. Mauris a fermentum habitasse sollicitudin semper porta. Fermentum phasellus hendrerit purus, etiam erat litora. | ||
Lorem cubilia cubilia dis iaculis, odio vivamus interdum adipiscing dolor.` | ||
|
||
results := GenerateChunks(input) | ||
for _, result := range results { | ||
log.Printf(result) | ||
} | ||
|
||
if len(results) != 8 { | ||
t.Errorf("wrong number of chunks returned, got %v want %v", len(results), 8) | ||
} | ||
string := "Fermentum phasellus hendrerit purus, etiam erat litora." | ||
if len(results) > 6 && results[6] != string { | ||
t.Errorf("wrong chunk return, %v, got %v want %v", results[6] == string, results[6], string) | ||
t.Errorf("one: %v", results[6]) | ||
t.Errorf("two: %v", string) | ||
|
||
} | ||
last := "\n\nLorem cubilia cubilia dis iaculis, odio vivamus interdum adipiscing dolor." | ||
if len(results) > 7 && results[7] != last { | ||
t.Errorf("wrong chunk return, got %v want %v", results[7], last) | ||
|
||
} | ||
} |