From ae4bc4e3cb1519eaf87248d7c7eae40f35562dc0 Mon Sep 17 00:00:00 2001 From: Filip Borkiewicz Date: Mon, 13 Nov 2023 11:40:09 -0600 Subject: [PATCH] Fix tweet content generation (#43) - Fix strings without spaces wouldn't get included in the tweet, made the splitting mechanism more naive - it now ignores spaces but should be less error prone. - Fix link detection not accepting slashes as part of the links. --- service/domain/content/lexer.go | 20 ++++++++++---------- service/domain/content/lexer_test.go | 10 ++++++++++ service/domain/tweet_generator.go | 18 +++++++++--------- service/domain/tweet_generator_test.go | 24 +++++++++++++++++++++++- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/service/domain/content/lexer.go b/service/domain/content/lexer.go index f560280..9c2a623 100644 --- a/service/domain/content/lexer.go +++ b/service/domain/content/lexer.go @@ -8,6 +8,15 @@ import ( "github.com/boreq/errors" ) +const ( + httpColonSlashSlash = "http://" + httpsColonSlashSlash = "https://" + nostrColon = "nostr:" + nevent = "nevent" + npub = "npub" + note = "note" +) + type Token struct { Type TokenType Text string @@ -168,15 +177,6 @@ func (l *Lexer) mergeConsecutiveTexts(tokens []Token) []Token { type stateFn func(l *Lexer) (stateFn, error) -const ( - httpColonSlashSlash = "http://" - httpsColonSlashSlash = "https://" - nostrColon = "nostr:" - nevent = "nevent" - npub = "npub" - note = "note" -) - func stateText(l *Lexer) (stateFn, error) { for { if l.comesNext(httpColonSlashSlash) || l.comesNext(httpsColonSlashSlash) { @@ -295,7 +295,7 @@ func stateNostrLinkData(l *Lexer) (stateFn, error) { } func isValidLinkCharacterExcludingDot(r rune) bool { - return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '%' + return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '%' || r == '/' } func isBech32(r rune) bool { diff --git a/service/domain/content/lexer_test.go b/service/domain/content/lexer_test.go index 0196bc1..a643036 100644 --- a/service/domain/content/lexer_test.go +++ b/service/domain/content/lexer_test.go @@ -133,6 +133,16 @@ func TestLexer(t *testing.T) { }, }, }, + { + Name: "only_link_with_path", + In: `https://example.com/path`, + Out: []content.Token{ + { + Type: content.TokenTypeLink, + Text: "https://example.com/path", + }, + }, + }, } for _, testCase := range testCases { diff --git a/service/domain/tweet_generator.go b/service/domain/tweet_generator.go index a5560c2..a146b6b 100644 --- a/service/domain/tweet_generator.go +++ b/service/domain/tweet_generator.go @@ -3,6 +3,7 @@ package domain import ( "fmt" "strings" + "unicode/utf8" "github.com/boreq/errors" "github.com/planetary-social/nos-crossposting-service/service/domain/content" @@ -70,17 +71,16 @@ func (g *TweetGenerator) createContent(builder *strings.Builder, elements []cont builder.WriteString(element.Text) case content.ElementTypeText: - for i, part := range strings.Split(element.Text, " ") { - futureTotalLength := builder.Len() + len(part) - if futureTotalLength > noteContentMaxLengthInRunes { - builder.WriteString(ellipsis) + remainingLen := noteContentMaxLengthInRunes - utf8.RuneCountInString(builder.String()) + numberOfWrittenRunes := 0 + for _, r := range element.Text { + builder.WriteRune(r) + numberOfWrittenRunes++ + + if numberOfWrittenRunes >= remainingLen { + builder.WriteString("...") return nil } - - if i > 0 { - builder.WriteString(" ") - } - builder.WriteString(part) } default: return errors.New("unknown element") diff --git a/service/domain/tweet_generator_test.go b/service/domain/tweet_generator_test.go index e49f904..c4cd865 100644 --- a/service/domain/tweet_generator_test.go +++ b/service/domain/tweet_generator_test.go @@ -29,6 +29,17 @@ func TestTweetGenerator(t *testing.T) { }, ExpectedContent: "Some text.", }, + { + Name: "not_a_reply_long_without_spaces", + Event: nostr.Event{ + Kind: domain.EventKindNote.Int(), + Tags: []nostr.Tag{ + []string{"p", fixtures.SomePublicKey().Hex()}, + }, + Content: strings.Repeat("a", 300), + }, + ExpectedContent: strings.Repeat("a", 200) + "...", + }, { Name: "not_a_reply_long", Event: nostr.Event{ @@ -38,7 +49,18 @@ func TestTweetGenerator(t *testing.T) { }, Content: strings.Repeat("Some text. ", 100), }, - ExpectedContent: "Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text....", + ExpectedContent: "Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. Some text. So...", + }, + { + Name: "not_a_reply_huge_link", + Event: nostr.Event{ + Kind: domain.EventKindNote.Int(), + Tags: []nostr.Tag{ + []string{"p", fixtures.SomePublicKey().Hex()}, + }, + Content: "https://example.com/" + strings.Repeat("a", 300), + }, + ExpectedContent: "...", }, { Name: "reply",