Skip to content

Commit

Permalink
Kindle: Go back to using digital-text, but improve code handling
Browse files Browse the repository at this point in the history
  • Loading branch information
ahobsonsayers committed Dec 28, 2024
1 parent 4e40b8a commit 5c2339a
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 22 deletions.
46 changes: 27 additions & 19 deletions kindle/book.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,54 +7,61 @@ import (
"github.com/ahobsonsayers/abs-tract/utils"
"github.com/antchfx/htmlquery"
"github.com/antchfx/xpath"
mapset "github.com/deckarep/golang-set/v2"
"golang.org/x/net/html"
)

const publishDateLayout = "Jan 2, 2006"

var (
bookCoverSetExpr = xpath.MustCompile(`.//img/@srcset`)
bookFormatExpr = xpath.MustCompile(`.//a[contains(text(), "Kindle Edition")]//text()`)
bookFormatExpr = xpath.MustCompile(`.//a[matches(., "Kindle|Hardcover|Paperback")]//text()`)
bookInfoExpr = xpath.MustCompile(`.//div[contains(@class, "a-color-secondary")]`)
bookTitleExpr = xpath.MustCompile(`.//h2`)
searchResultsExpr = xpath.MustCompile(`//div[contains(@class, "s-result-list")]//div[@data-index and @data-asin]`)
)

type Book struct {
ASIN string
Format string
Title string
Author string
Cover string
PublishDate *time.Time
}

// BooksFromHTML parses and returns the books from the html of a search results page
// BooksFromHTML parses the books from the html of a search results page.
func BooksFromHTML(searchNode *html.Node) ([]Book, error) {
resultNodes := htmlquery.QuerySelectorAll(searchNode, searchResultsExpr)

books := make([]Book, 0, len(resultNodes))
seenAsins := mapset.NewSet[string]()
for _, resultNode := range resultNodes {
if !isKindleBook(resultNode) {
continue
}

// Attempt to parse result as a book, recording it if it
// could be parsed and the asin has not already been seen
book := BookFromHTML(resultNode)
if book != nil {
if book != nil && !seenAsins.Contains(book.ASIN) {
books = append(books, *book)
seenAsins.Add(book.ASIN)
}
}

return books, nil
}

// BookFromHTML parses and returns a book from the html
// of a book result on the search results page
// BookFromHTML parses a book from the html of a result on the search results page.
// If a result is not for a book, nil will is returned.
func BookFromHTML(bookNode *html.Node) *Book {
asin := bookAsin(bookNode)
if asin == "" {
return nil
}

format := bookFormat(bookNode)
if format == "" {
return nil
}

title := bookTitle(bookNode)
if title == "" {
return nil
Expand All @@ -65,27 +72,28 @@ func BookFromHTML(bookNode *html.Node) *Book {

return &Book{
ASIN: asin,
Format: format,
Title: title,
Author: author,
Cover: cover,
PublishDate: publishDate,
}
}

func isKindleBook(bookNode *html.Node) bool {
// bookAsin gets the book asim.
func bookAsin(bookNode *html.Node) string {
return htmlquery.SelectAttr(bookNode, "data-asin")
}

// bookFormat gets the book format
func bookFormat(bookNode *html.Node) string {
bookFormatNode := htmlquery.QuerySelector(bookNode, bookFormatExpr)
if bookFormatNode == nil {
return false
return ""
}
bookFormatNodeValue := htmlquery.InnerText(bookFormatNode)

bookFormat := strings.ToLower(bookFormatNodeValue)
return strings.Contains(bookFormat, "kindle")
}

// bookAsin gets the book asim.
func bookAsin(bookNode *html.Node) string {
return htmlquery.SelectAttr(bookNode, "data-asin")
bookFormatNodeValue := htmlquery.InnerText(bookFormatNode)
return strings.ToLower(bookFormatNodeValue)
}

// bookTitle gets the book title.
Expand Down
3 changes: 2 additions & 1 deletion kindle/kindle.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ func (c *Client) get(

func (c *Client) Search(ctx context.Context, title string, author *string) ([]Book, error) {
parameters := map[string]string{
"i": "stripbooks",
"i": "digital-text",
// "i": "stripbooks",
"k": title,
}
if author != nil && *author != "" {
Expand Down
8 changes: 6 additions & 2 deletions kindle/kindle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ const (
)

func TestSearchBook(t *testing.T) {
t.Skip("Fails in CI")
// t.Skip("Fails in CI")
// Should return https://www.amazon.com/dp/B007978NU6
books, err := kindle.DefaultClient.Search(context.Background(), TheHobbitTitle, lo.ToPtr(TheHobbitAuthor))
books, err := kindle.DefaultClient.Search(
context.Background(),
TheHobbitTitle,
lo.ToPtr(TheHobbitAuthor),
)
require.NoError(t, err)
require.NotEmpty(t, books)

Expand Down

0 comments on commit 5c2339a

Please sign in to comment.