Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ahobsonsayers committed Dec 27, 2024
2 parents 2e4e5ad + b57224e commit 2651e89
Show file tree
Hide file tree
Showing 12 changed files with 344 additions and 203 deletions.
14 changes: 6 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,16 @@ and enter the following details:
### Goodreads

- Name: **GoodReads**
- URL: **http://<your_address>:5555/goodreads**
- Authorization Header Value: **<leave this unset>**
- URL: `http://<your_address>:5555/goodreads`
- e.g. `192.168.1.100:5555/goodreads`
- Authorization Header Value: **Leave this unset**

### Kindle

- Name: **Kindle**
- URL: **http://<your_address>:5555/kindle/<your_region>**
- Authorization Header Value: **<leave this unset>**
- URL: `http://<your_address>:5555/kindle/<your_region>`
- e.g. `192.168.1.100:5555/kindle/uk`
- Authorization Header Value: **Leave this unset**

Region can be one of the following:

Expand All @@ -122,10 +124,6 @@ Region can be one of the following:
- uk - United Kingdom
- us - United States

### Setup video walkthrough:

https://github.com/ahobsonsayers/abs-tract/assets/32173585/54437af6-a17c-4458-bb82-479b183171da

## FAQ

### Why is Goodreads not returning covers?
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/ahobsonsayers/abs-tract
go 1.21

require (
github.com/adrg/strutil v0.3.1
github.com/antchfx/htmlquery v1.3.1
github.com/antchfx/xpath v1.3.0
github.com/deckarep/golang-set/v2 v2.6.0
Expand All @@ -12,7 +13,6 @@ require (
github.com/go-chi/httplog/v2 v2.0.9
github.com/jinzhu/inflection v1.0.0
github.com/k3a/html2text v1.2.1
github.com/lithammer/fuzzysearch v1.1.8
github.com/oapi-codegen/nethttp-middleware v1.0.1
github.com/oapi-codegen/runtime v1.1.1
github.com/orsinium-labs/enum v1.4.0
Expand Down
10 changes: 3 additions & 7 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4=
github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA=
github.com/antchfx/htmlquery v1.3.1 h1:wm0LxjLMsZhRHfQKKZscDf2COyH4vDYA3wyH+qZ+Ylc=
github.com/antchfx/htmlquery v1.3.1/go.mod h1:PTj+f1V2zksPlwNt7uVvZPsxpKNa7mlVliCRxLX6Nx8=
github.com/antchfx/xpath v1.3.0 h1:nTMlzGAK3IJ0bPpME2urTuFL76o4A96iYvoKFHRXJgc=
Expand Down Expand Up @@ -52,8 +54,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/matryer/is v1.4.1 h1:55ehd8zaGABKLXQUe2awZ99BD/PTc2ls+KV/dXphgEQ=
Expand Down Expand Up @@ -84,6 +84,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
Expand All @@ -96,20 +97,17 @@ golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand All @@ -125,14 +123,12 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
45 changes: 42 additions & 3 deletions goodreads/book.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import (
)

var (
// These are dirty workarounds, but they seem to work
alternativeCoverRegex = regexp.MustCompile(`^\s*<i>.*[Aa]lternat(iv)?e cover.*</i>\s*$`)
breakTagRegex = regexp.MustCompile(`<br\s*/?>`)
lastBracketRegex = regexp.MustCompile(`^(.*)(\([^\(\)]*\))([^()]*)$`)
)

Expand Down Expand Up @@ -65,7 +67,7 @@ func (b *Book) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
}

type Work struct {
Title string `xml:"original_title"`
FullTitle string `xml:"original_title"`
MediaType string `xml:"media_type"`
EditionsCount int `xml:"books_count"`

Expand All @@ -81,6 +83,23 @@ type Work struct {
RatingDistribution string `xml:"rating_dist"`
}

// Title is the full title with any subtitle removed.
// A subtitle is anything after the first : in the full title
func (w Work) Title() string {
titleParts := strings.Split(w.FullTitle, ":")
return strings.TrimSpace(titleParts[0])
}

// Subtitle is the subtle part of the full title.
// A subtitle is anything after the first : in the full title
func (w Work) Subtitle() string {
colonIdx := strings.Index(w.FullTitle, ":")
if colonIdx == -1 {
return ""
}
return strings.TrimSpace(w.FullTitle[colonIdx+1:])
}

func (w Work) AverageRating() float64 {
averageRating := float64(w.RatingsSum) / float64(w.RatingsCount)
return math.Round(averageRating*100) / 100 // Round to two decimal places
Expand All @@ -89,7 +108,7 @@ func (w Work) AverageRating() float64 {
type Edition struct {
Id string `xml:"id"`
ISBN *string `xml:"isbn13"`
Title string `xml:"title"`
FullTitle string `xml:"title"`
Description string `xml:"description"`
NumPages string `xml:"num_pages"`
ImageURL string `xml:"image_url"`
Expand All @@ -103,11 +122,31 @@ type Edition struct {
Language string `xml:"language_code"`
}

// Title is the full title with any subtitle removed.
// A subtitle is anything after the first : in the full title
func (e Edition) Title() string {
titleParts := strings.Split(e.FullTitle, ":")
return strings.TrimSpace(titleParts[0])
}

// Subtitle is the subtle part of the full title.
// A subtitle is anything after the first : in the full title
func (e Edition) Subtitle() string {
colonIdx := strings.Index(e.FullTitle, ":")
if colonIdx == -1 {
return ""
}
return strings.TrimSpace(e.FullTitle[colonIdx+1:])
}

func (e *Edition) Sanitise() {
// Description can sometimes be html and contain preamble about alternative covers
// Break tags need to be specially handled to add new lines as html2text does
// not convert them to new lines properly
description := strings.TrimSpace(e.Description)
description = alternativeCoverRegex.ReplaceAllString(description, "")
description = html2text.HTML2Text(description)
description = breakTagRegex.ReplaceAllString(description, "\n")
description = html2text.HTML2TextWithOptions(description, html2text.WithUnixLineBreaks())
e.Description = description

// Get original cover image by cleaning the ul0
Expand Down
164 changes: 1 addition & 163 deletions goodreads/goodreads.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ import (
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"sync"

"github.com/ahobsonsayers/abs-tract/utils"
"github.com/lithammer/fuzzysearch/fuzzy"
"github.com/orsinium-labs/enum"
"github.com/samber/lo"
)
Expand All @@ -32,11 +29,6 @@ var (
goodreadsUrl: utils.CloneURL(defaultGoodreadsUrl),
apiKey: DefaultAPIKey,
}

bookSearchTypeEnum = enum.NewBuilder[string, BookSearchType]()
BookSearchTypeTitle = bookSearchTypeEnum.Add(BookSearchType{"title"})
BookSearchTypeAuthor = bookSearchTypeEnum.Add(BookSearchType{"author"})
BookSearchTypes = bookSearchTypeEnum.Enum()
)

type BookSearchType enum.Member[string]
Expand Down Expand Up @@ -150,7 +142,7 @@ func (c *Client) GetBooksByIds(ctx context.Context, bookIds []string) ([]Book, e
// Only return books whose work have a title
validBooks := make([]Book, 0, len(books))
for _, book := range books {
if book.Work.Title != "" {
if book.Work.FullTitle != "" {
validBooks = append(validBooks, book)
}
}
Expand All @@ -176,157 +168,3 @@ func (c *Client) GetBookByTitle(ctx context.Context, bookTitle string, bookAutho

return result.Work, nil
}

// SearchBooks search for a book by its title and optionally an author (which can give better results)
// https://www.goodreads.com/api/index#search.books
func (c *Client) SearchBooks(ctx context.Context, bookTitle string, bookAuthor *string) ([]Book, error) {
var bookOverviews []BookOverview
var err error
if bookAuthor == nil || *bookAuthor == "" {
// If author is not set, search for books by title
bookOverviews, err = c.searchBooksManyPages(ctx, searchBooksManyPagesInput{
Query: bookTitle,
SearchType: BookSearchTypeTitle,
Page: 1,
NumPages: 5,
})
if err != nil {
return nil, err
}

} else {
// If author is set, search for books by author ONLY.
// We will then search the authors books for title.
// We do NOT search goodreads by title AND author together using the 'all' search type
// as goodreads returns awful results, including sometimes none at all.
authorBookOverviews, err := c.searchBooksManyPages(ctx, searchBooksManyPagesInput{
Query: *bookAuthor,
SearchType: BookSearchTypeAuthor,
Page: 1,
NumPages: 15,
})
if err != nil {
return nil, err
}

// In author books, find books that (fuzzily) match the title
authorBookOverviewMatches := fuzzy.RankFindNormalizedFold(bookTitle, BookTitles(authorBookOverviews))

// Use matched author books for the book overviews. User order returned by goodreads
matchedBookOverviews := make([]BookOverview, 0, len(authorBookOverviewMatches))
for _, match := range authorBookOverviewMatches {
matchedBookOverviews = append(matchedBookOverviews, authorBookOverviews[match.OriginalIndex])
}
bookOverviews = matchedBookOverviews
}

// Get book details using their ids
bookDetails, err := c.GetBooksByIds(ctx, BookIds(bookOverviews))
if err != nil {
return nil, err
}

return bookDetails, nil
}

type searchBooksSinglePageInput struct {
Query string
SearchType BookSearchType
Page int
}

// searchBooksPage searches for books and returns the requested page of results.
// If Query is not set, no search will be performed and no results will be returned.
// If search type is unset or invalid, search will fallback to a title search
// If page is < 1, first page of results is returned.
func (c *Client) searchBooksSinglePage(ctx context.Context, input searchBooksSinglePageInput) ([]BookOverview, error) {
input.Query = strings.TrimSpace(input.Query)
if input.Query == "" {
return nil, nil
}
if !BookSearchTypes.Contains(input.SearchType) {
// Default to title search
input.SearchType = BookSearchTypeTitle
}
if input.Page < 1 {
input.Page = 1
}

queryParams := map[string]string{
"q": input.Query,
"search[field]": input.SearchType.Value,
"page": strconv.Itoa(input.Page),
}
var unmarshaller struct {
Books []BookOverview `xml:"search>results>work>best_book"`
}
err := c.get(ctx, "search/index.xml", queryParams, &unmarshaller)
if err != nil {
return nil, err
}

// Sanitise the books
books := make([]BookOverview, 0, len(unmarshaller.Books))
for _, book := range unmarshaller.Books {
book.Sanitise()
books = append(books, book)
}

return books, nil
}

type searchBooksManyPagesInput struct {
Query string
SearchType BookSearchType
Page int
NumPages int
}

// searchBooksManyPages searches for books and returns (flattened) results from the request number of pages.
// Arguments are the same as searchBooks except for NumPages. If NumPages < 1, a single page of results
// will be returned
func (c *Client) searchBooksManyPages(ctx context.Context, input searchBooksManyPagesInput) ([]BookOverview, error) {
if input.Page < 1 {
input.Page = 1
}
if input.NumPages < 1 {
input.NumPages = 1
}

bookPages := make([][]BookOverview, input.NumPages)
var errs error

var wg sync.WaitGroup
var bookIdsMutex sync.Mutex
var errsMutex sync.Mutex

// Get 5 pages of books. This should be enough
for idx := 0; idx < input.NumPages; idx++ {
wg.Add(1)

go func(idx int) {
defer wg.Done()

pageBooks, err := c.searchBooksSinglePage(
ctx, searchBooksSinglePageInput{
Query: input.Query,
SearchType: input.SearchType,
Page: input.Page + idx,
},
)
if err != nil {
errsMutex.Lock()
errs = errors.Join(errs, err)
errsMutex.Unlock()
return
}

bookIdsMutex.Lock()
bookPages[idx] = pageBooks
bookIdsMutex.Unlock()
}(idx)
}
wg.Wait()

return lo.Flatten(bookPages), errs
}
Loading

0 comments on commit 2651e89

Please sign in to comment.