From 5975b9140965fb89d8bbac4d6fb9bbfe89d8887c Mon Sep 17 00:00:00 2001
From: hyponet
Date: Sat, 30 Sep 2023 18:29:56 +0800
Subject: [PATCH] feta: docloader plugin
Signed-off-by: hyponet
---
go.mod | 19 +-
go.sum | 61 +-
pkg/dentry/group.go | 8 +-
pkg/dentry/group_test.go | 8 +-
pkg/plugin/buildin/docloader/csv.go | 80 +
pkg/plugin/buildin/docloader/docloader.go | 102 +
pkg/plugin/buildin/docloader/epub.go | 17 +
pkg/plugin/buildin/docloader/html.go | 66 +
pkg/plugin/buildin/docloader/pdf.go | 107 +
pkg/plugin/buildin/docloader/plaintext.go | 56 +
pkg/plugin/buildin/rss.go | 8 +-
pkg/plugin/mirror.go | 46 +-
pkg/plugin/pluginapi/consts.go | 28 +
pkg/plugin/{stub => pluginapi}/entry.go | 2 +-
pkg/plugin/{stub => pluginapi}/process.go | 14 +-
pkg/plugin/process.go | 37 +-
pkg/plugin/registry.go | 2 +-
pkg/plugin/source.go | 14 +-
pkg/types/friday.go | 22 +
pkg/workflow/exec/consts.go | 9 -
pkg/workflow/exec/executor.go | 52 +-
pkg/workflow/mirrordir.go | 40 +-
.../PuerkitoBio/goquery/.gitattributes | 1 +
.../github.com/PuerkitoBio/goquery/.gitignore | 16 +
vendor/github.com/PuerkitoBio/goquery/LICENSE | 12 +
.../github.com/PuerkitoBio/goquery/README.md | 198 +
.../github.com/PuerkitoBio/goquery/array.go | 124 +
vendor/github.com/PuerkitoBio/goquery/doc.go | 123 +
.../github.com/PuerkitoBio/goquery/expand.go | 70 +
.../github.com/PuerkitoBio/goquery/filter.go | 163 +
.../PuerkitoBio/goquery/iteration.go | 39 +
.../PuerkitoBio/goquery/manipulation.go | 679 ++
.../PuerkitoBio/goquery/property.go | 275 +
.../github.com/PuerkitoBio/goquery/query.go | 49 +
.../PuerkitoBio/goquery/traversal.go | 698 ++
vendor/github.com/PuerkitoBio/goquery/type.go | 203 +
.../PuerkitoBio/goquery/utilities.go | 178 +
.../andybalholm/cascadia/.travis.yml | 14 +
.../github.com/andybalholm/cascadia/LICENSE | 24 +
.../github.com/andybalholm/cascadia/README.md | 144 +
.../github.com/andybalholm/cascadia/parser.go | 889 ++
.../andybalholm/cascadia/pseudo_classes.go | 458 +
.../andybalholm/cascadia/selector.go | 586 ++
.../andybalholm/cascadia/serialize.go | 176 +
.../andybalholm/cascadia/specificity.go | 26 +
vendor/github.com/aymerick/douceur/LICENSE | 22 +
.../aymerick/douceur/css/declaration.go | 60 +
.../github.com/aymerick/douceur/css/rule.go | 230 +
.../aymerick/douceur/css/stylesheet.go | 25 +
.../aymerick/douceur/parser/parser.go | 409 +
vendor/github.com/go-shiori/dom/.gitignore | 2 +
vendor/github.com/go-shiori/dom/.travis.yml | 11 +
vendor/github.com/go-shiori/dom/LICENSE | 21 +
vendor/github.com/go-shiori/dom/README.md | 22 +
vendor/github.com/go-shiori/dom/dom.go | 622 ++
vendor/github.com/go-shiori/dom/parser.go | 61 +
.../go-shiori/go-readability/.gitattributes | 1 +
.../go-shiori/go-readability/.gitignore | 5 +
.../go-shiori/go-readability/.travis.yml | 16 +
.../go-shiori/go-readability/LICENSE | 21 +
.../go-shiori/go-readability/README.md | 169 +
.../go-shiori/go-readability/parser-check.go | 79 +
.../go-shiori/go-readability/parser-parse.go | 129 +
.../go-shiori/go-readability/parser.go | 2300 +++++
.../go-shiori/go-readability/readability.go | 77 +
.../go-shiori/go-readability/utils.go | 100 +
vendor/github.com/gogs/chardet/2022.go | 102 +
vendor/github.com/gogs/chardet/AUTHORS | 1 +
vendor/github.com/gogs/chardet/LICENSE | 22 +
vendor/github.com/gogs/chardet/README.md | 12 +
vendor/github.com/gogs/chardet/detector.go | 147 +
.../github.com/gogs/chardet/icu-license.html | 51 +
vendor/github.com/gogs/chardet/multi_byte.go | 345 +
vendor/github.com/gogs/chardet/recognizer.go | 83 +
vendor/github.com/gogs/chardet/single_byte.go | 882 ++
vendor/github.com/gogs/chardet/unicode.go | 103 +
vendor/github.com/gogs/chardet/utf8.go | 71 +
vendor/github.com/gorilla/css/LICENSE | 27 +
vendor/github.com/gorilla/css/scanner/doc.go | 33 +
.../github.com/gorilla/css/scanner/scanner.go | 356 +
.../github.com/hyponet/webpage-packer/LICENSE | 201 +
.../webpage-packer/packer/clutterfree.go | 79 +
.../hyponet/webpage-packer/packer/html.go | 119 +
.../webpage-packer/packer/interface.go | 8 +
.../hyponet/webpage-packer/packer/option.go | 21 +
.../hyponet/webpage-packer/packer/utils.go | 64 +
.../webpage-packer/packer/webarchive.go | 268 +
vendor/github.com/ledongthuc/pdf/LICENSE | 27 +
vendor/github.com/ledongthuc/pdf/README.md | 138 +
vendor/github.com/ledongthuc/pdf/ascii85.go | 53 +
vendor/github.com/ledongthuc/pdf/lex.go | 529 ++
vendor/github.com/ledongthuc/pdf/name.go | 4286 +++++++++
vendor/github.com/ledongthuc/pdf/page.go | 1050 +++
vendor/github.com/ledongthuc/pdf/ps.go | 138 +
vendor/github.com/ledongthuc/pdf/read.go | 1112 +++
vendor/github.com/ledongthuc/pdf/text.go | 158 +
.../microcosm-cc/bluemonday/.coveralls.yml | 1 +
.../microcosm-cc/bluemonday/.editorconfig | 4 +
.../microcosm-cc/bluemonday/.gitattributes | 1 +
.../microcosm-cc/bluemonday/.gitignore | 15 +
.../microcosm-cc/bluemonday/.travis.yml | 26 +
.../microcosm-cc/bluemonday/CONTRIBUTING.md | 52 +
.../microcosm-cc/bluemonday/CREDITS.md | 8 +
.../microcosm-cc/bluemonday/LICENSE.md | 31 +
.../microcosm-cc/bluemonday/Makefile | 48 +
.../microcosm-cc/bluemonday/README.md | 418 +
.../microcosm-cc/bluemonday/SECURITY.md | 15 +
.../microcosm-cc/bluemonday/css/handlers.go | 2015 +++++
.../github.com/microcosm-cc/bluemonday/doc.go | 104 +
.../microcosm-cc/bluemonday/helpers.go | 300 +
.../microcosm-cc/bluemonday/policies.go | 253 +
.../microcosm-cc/bluemonday/policy.go | 990 +++
.../microcosm-cc/bluemonday/sanitize.go | 1089 +++
.../bluemonday/stringwriterwriter_go1.12.go | 11 +
.../bluemonday/stringwriterwriter_ltgo1.12.go | 15 +
vendor/golang.org/x/net/html/render.go | 28 +-
vendor/golang.org/x/net/html/token.go | 9 +-
vendor/golang.org/x/net/http2/Dockerfile | 51 -
vendor/golang.org/x/net/http2/Makefile | 3 -
vendor/golang.org/x/net/http2/server.go | 8 -
vendor/golang.org/x/net/http2/transport.go | 63 +-
vendor/golang.org/x/net/idna/idna9.0.0.go | 2 +-
vendor/golang.org/x/net/idna/tables13.0.0.go | 2988 ++++---
vendor/golang.org/x/net/idna/tables15.0.0.go | 5145 +++++++++++
vendor/golang.org/x/net/idna/trie.go | 21 -
vendor/golang.org/x/net/idna/trie12.0.0.go | 31 +
vendor/golang.org/x/net/idna/trie13.0.0.go | 31 +
.../x/net/publicsuffix/data/children | Bin 2876 -> 2976 bytes
.../golang.org/x/net/publicsuffix/data/nodes | Bin 48280 -> 46610 bytes
.../golang.org/x/net/publicsuffix/data/text | 2 +-
vendor/golang.org/x/net/publicsuffix/table.go | 14 +-
vendor/golang.org/x/sys/cpu/cpu.go | 5 +-
vendor/golang.org/x/sys/cpu/cpu_x86.go | 7 +
vendor/golang.org/x/sys/unix/mkerrors.sh | 5 +-
vendor/golang.org/x/sys/unix/mmap_nomremap.go | 14 +
vendor/golang.org/x/sys/unix/mremap.go | 53 +
vendor/golang.org/x/sys/unix/syscall_aix.go | 15 -
vendor/golang.org/x/sys/unix/syscall_bsd.go | 14 -
.../golang.org/x/sys/unix/syscall_darwin.go | 50 +-
vendor/golang.org/x/sys/unix/syscall_linux.go | 81 +-
.../x/sys/unix/syscall_linux_amd64.go | 2 +-
.../x/sys/unix/syscall_linux_arm64.go | 2 +-
.../x/sys/unix/syscall_linux_loong64.go | 2 +-
.../x/sys/unix/syscall_linux_mips64x.go | 2 +-
.../x/sys/unix/syscall_linux_riscv64.go | 13 +-
.../golang.org/x/sys/unix/syscall_netbsd.go | 13 +-
.../golang.org/x/sys/unix/syscall_solaris.go | 14 -
vendor/golang.org/x/sys/unix/syscall_unix.go | 11 +
.../x/sys/unix/syscall_zos_s390x.go | 14 -
vendor/golang.org/x/sys/unix/zerrors_linux.go | 43 +-
.../x/sys/unix/zerrors_linux_386.go | 9 +
.../x/sys/unix/zerrors_linux_amd64.go | 9 +
.../x/sys/unix/zerrors_linux_arm.go | 9 +
.../x/sys/unix/zerrors_linux_arm64.go | 11 +
.../x/sys/unix/zerrors_linux_loong64.go | 9 +
.../x/sys/unix/zerrors_linux_mips.go | 9 +
.../x/sys/unix/zerrors_linux_mips64.go | 9 +
.../x/sys/unix/zerrors_linux_mips64le.go | 9 +
.../x/sys/unix/zerrors_linux_mipsle.go | 9 +
.../x/sys/unix/zerrors_linux_ppc.go | 9 +
.../x/sys/unix/zerrors_linux_ppc64.go | 9 +
.../x/sys/unix/zerrors_linux_ppc64le.go | 9 +
.../x/sys/unix/zerrors_linux_riscv64.go | 9 +
.../x/sys/unix/zerrors_linux_s390x.go | 9 +
.../x/sys/unix/zerrors_linux_sparc64.go | 9 +
.../golang.org/x/sys/unix/zsyscall_linux.go | 33 +-
.../x/sys/unix/zsyscall_linux_riscv64.go | 16 +
.../x/sys/unix/zsyscall_netbsd_386.go | 11 +
.../x/sys/unix/zsyscall_netbsd_amd64.go | 11 +
.../x/sys/unix/zsyscall_netbsd_arm.go | 11 +
.../x/sys/unix/zsyscall_netbsd_arm64.go | 11 +
.../x/sys/unix/zsysnum_linux_riscv64.go | 2 +
.../x/sys/unix/zsysnum_linux_s390x.go | 1 +
vendor/golang.org/x/sys/unix/ztypes_linux.go | 55 +-
.../golang.org/x/sys/unix/ztypes_linux_386.go | 2 +
.../x/sys/unix/ztypes_linux_amd64.go | 2 +
.../golang.org/x/sys/unix/ztypes_linux_arm.go | 2 +
.../x/sys/unix/ztypes_linux_arm64.go | 2 +
.../x/sys/unix/ztypes_linux_loong64.go | 2 +
.../x/sys/unix/ztypes_linux_mips.go | 2 +
.../x/sys/unix/ztypes_linux_mips64.go | 2 +
.../x/sys/unix/ztypes_linux_mips64le.go | 2 +
.../x/sys/unix/ztypes_linux_mipsle.go | 2 +
.../golang.org/x/sys/unix/ztypes_linux_ppc.go | 2 +
.../x/sys/unix/ztypes_linux_ppc64.go | 2 +
.../x/sys/unix/ztypes_linux_ppc64le.go | 2 +
.../x/sys/unix/ztypes_linux_riscv64.go | 25 +
.../x/sys/unix/ztypes_linux_s390x.go | 2 +
.../x/sys/unix/ztypes_linux_sparc64.go | 2 +
vendor/golang.org/x/sys/windows/service.go | 4 +
.../x/sys/windows/syscall_windows.go | 15 +-
.../x/sys/windows/zsyscall_windows.go | 26 +-
.../golang.org/x/text/cases/tables13.0.0.go | 4 +-
.../golang.org/x/text/cases/tables15.0.0.go | 2528 ++++++
.../text/internal/language/compact/tables.go | 356 +-
.../x/text/internal/language/tables.go | 4686 +++++-----
vendor/golang.org/x/text/language/match.go | 2 +-
vendor/golang.org/x/text/language/tables.go | 138 +-
.../x/text/secure/precis/tables13.0.0.go | 4 +-
.../x/text/secure/precis/tables15.0.0.go | 4316 +++++++++
.../x/text/unicode/bidi/tables13.0.0.go | 4 +-
.../x/text/unicode/bidi/tables15.0.0.go | 2043 +++++
.../x/text/unicode/norm/tables13.0.0.go | 4 +-
.../x/text/unicode/norm/tables15.0.0.go | 7908 +++++++++++++++++
vendor/golang.org/x/text/unicode/norm/trie.go | 2 +-
.../golang.org/x/text/width/tables13.0.0.go | 4 +-
.../golang.org/x/text/width/tables15.0.0.go | 1368 +++
vendor/howett.net/plist/.gitignore | 16 +
vendor/howett.net/plist/.gitlab-ci.yml | 39 +
vendor/howett.net/plist/LICENSE | 58 +
vendor/howett.net/plist/README.md | 21 +
vendor/howett.net/plist/bplist.go | 26 +
vendor/howett.net/plist/bplist_generator.go | 303 +
vendor/howett.net/plist/bplist_parser.go | 353 +
vendor/howett.net/plist/decode.go | 119 +
vendor/howett.net/plist/doc.go | 5 +
vendor/howett.net/plist/encode.go | 126 +
vendor/howett.net/plist/fuzz.go | 17 +
vendor/howett.net/plist/marshal.go | 187 +
vendor/howett.net/plist/must.go | 50 +
vendor/howett.net/plist/plist.go | 83 +
vendor/howett.net/plist/plist_types.go | 172 +
vendor/howett.net/plist/text_generator.go | 228 +
vendor/howett.net/plist/text_parser.go | 580 ++
vendor/howett.net/plist/text_tables.go | 61 +
vendor/howett.net/plist/typeinfo.go | 170 +
vendor/howett.net/plist/unmarshal.go | 331 +
vendor/howett.net/plist/util.go | 25 +
vendor/howett.net/plist/xml_generator.go | 178 +
vendor/howett.net/plist/xml_parser.go | 211 +
vendor/howett.net/plist/zerocopy.go | 20 +
vendor/howett.net/plist/zerocopy_appengine.go | 7 +
vendor/modules.txt | 45 +-
233 files changed, 57758 insertions(+), 4389 deletions(-)
create mode 100644 pkg/plugin/buildin/docloader/csv.go
create mode 100644 pkg/plugin/buildin/docloader/docloader.go
create mode 100644 pkg/plugin/buildin/docloader/epub.go
create mode 100644 pkg/plugin/buildin/docloader/html.go
create mode 100644 pkg/plugin/buildin/docloader/pdf.go
create mode 100644 pkg/plugin/buildin/docloader/plaintext.go
create mode 100644 pkg/plugin/pluginapi/consts.go
rename pkg/plugin/{stub => pluginapi}/entry.go (97%)
rename pkg/plugin/{stub => pluginapi}/process.go (74%)
create mode 100644 pkg/types/friday.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/.gitattributes
create mode 100644 vendor/github.com/PuerkitoBio/goquery/.gitignore
create mode 100644 vendor/github.com/PuerkitoBio/goquery/LICENSE
create mode 100644 vendor/github.com/PuerkitoBio/goquery/README.md
create mode 100644 vendor/github.com/PuerkitoBio/goquery/array.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/doc.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/expand.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/filter.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/iteration.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/manipulation.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/property.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/query.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/traversal.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/type.go
create mode 100644 vendor/github.com/PuerkitoBio/goquery/utilities.go
create mode 100644 vendor/github.com/andybalholm/cascadia/.travis.yml
create mode 100644 vendor/github.com/andybalholm/cascadia/LICENSE
create mode 100644 vendor/github.com/andybalholm/cascadia/README.md
create mode 100644 vendor/github.com/andybalholm/cascadia/parser.go
create mode 100644 vendor/github.com/andybalholm/cascadia/pseudo_classes.go
create mode 100644 vendor/github.com/andybalholm/cascadia/selector.go
create mode 100644 vendor/github.com/andybalholm/cascadia/serialize.go
create mode 100644 vendor/github.com/andybalholm/cascadia/specificity.go
create mode 100644 vendor/github.com/aymerick/douceur/LICENSE
create mode 100644 vendor/github.com/aymerick/douceur/css/declaration.go
create mode 100644 vendor/github.com/aymerick/douceur/css/rule.go
create mode 100644 vendor/github.com/aymerick/douceur/css/stylesheet.go
create mode 100644 vendor/github.com/aymerick/douceur/parser/parser.go
create mode 100644 vendor/github.com/go-shiori/dom/.gitignore
create mode 100644 vendor/github.com/go-shiori/dom/.travis.yml
create mode 100644 vendor/github.com/go-shiori/dom/LICENSE
create mode 100644 vendor/github.com/go-shiori/dom/README.md
create mode 100644 vendor/github.com/go-shiori/dom/dom.go
create mode 100644 vendor/github.com/go-shiori/dom/parser.go
create mode 100644 vendor/github.com/go-shiori/go-readability/.gitattributes
create mode 100644 vendor/github.com/go-shiori/go-readability/.gitignore
create mode 100644 vendor/github.com/go-shiori/go-readability/.travis.yml
create mode 100644 vendor/github.com/go-shiori/go-readability/LICENSE
create mode 100644 vendor/github.com/go-shiori/go-readability/README.md
create mode 100644 vendor/github.com/go-shiori/go-readability/parser-check.go
create mode 100644 vendor/github.com/go-shiori/go-readability/parser-parse.go
create mode 100644 vendor/github.com/go-shiori/go-readability/parser.go
create mode 100644 vendor/github.com/go-shiori/go-readability/readability.go
create mode 100644 vendor/github.com/go-shiori/go-readability/utils.go
create mode 100644 vendor/github.com/gogs/chardet/2022.go
create mode 100644 vendor/github.com/gogs/chardet/AUTHORS
create mode 100644 vendor/github.com/gogs/chardet/LICENSE
create mode 100644 vendor/github.com/gogs/chardet/README.md
create mode 100644 vendor/github.com/gogs/chardet/detector.go
create mode 100644 vendor/github.com/gogs/chardet/icu-license.html
create mode 100644 vendor/github.com/gogs/chardet/multi_byte.go
create mode 100644 vendor/github.com/gogs/chardet/recognizer.go
create mode 100644 vendor/github.com/gogs/chardet/single_byte.go
create mode 100644 vendor/github.com/gogs/chardet/unicode.go
create mode 100644 vendor/github.com/gogs/chardet/utf8.go
create mode 100644 vendor/github.com/gorilla/css/LICENSE
create mode 100644 vendor/github.com/gorilla/css/scanner/doc.go
create mode 100644 vendor/github.com/gorilla/css/scanner/scanner.go
create mode 100644 vendor/github.com/hyponet/webpage-packer/LICENSE
create mode 100644 vendor/github.com/hyponet/webpage-packer/packer/clutterfree.go
create mode 100644 vendor/github.com/hyponet/webpage-packer/packer/html.go
create mode 100644 vendor/github.com/hyponet/webpage-packer/packer/interface.go
create mode 100644 vendor/github.com/hyponet/webpage-packer/packer/option.go
create mode 100644 vendor/github.com/hyponet/webpage-packer/packer/utils.go
create mode 100644 vendor/github.com/hyponet/webpage-packer/packer/webarchive.go
create mode 100644 vendor/github.com/ledongthuc/pdf/LICENSE
create mode 100644 vendor/github.com/ledongthuc/pdf/README.md
create mode 100644 vendor/github.com/ledongthuc/pdf/ascii85.go
create mode 100644 vendor/github.com/ledongthuc/pdf/lex.go
create mode 100644 vendor/github.com/ledongthuc/pdf/name.go
create mode 100644 vendor/github.com/ledongthuc/pdf/page.go
create mode 100644 vendor/github.com/ledongthuc/pdf/ps.go
create mode 100644 vendor/github.com/ledongthuc/pdf/read.go
create mode 100644 vendor/github.com/ledongthuc/pdf/text.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/.coveralls.yml
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/.editorconfig
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/.gitattributes
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/.gitignore
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/.travis.yml
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/CONTRIBUTING.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/CREDITS.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/LICENSE.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/Makefile
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/README.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/SECURITY.md
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/css/handlers.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/doc.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/helpers.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/policies.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/policy.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/sanitize.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/stringwriterwriter_go1.12.go
create mode 100644 vendor/github.com/microcosm-cc/bluemonday/stringwriterwriter_ltgo1.12.go
delete mode 100644 vendor/golang.org/x/net/http2/Dockerfile
delete mode 100644 vendor/golang.org/x/net/http2/Makefile
create mode 100644 vendor/golang.org/x/net/idna/tables15.0.0.go
create mode 100644 vendor/golang.org/x/net/idna/trie12.0.0.go
create mode 100644 vendor/golang.org/x/net/idna/trie13.0.0.go
create mode 100644 vendor/golang.org/x/sys/unix/mmap_nomremap.go
create mode 100644 vendor/golang.org/x/sys/unix/mremap.go
create mode 100644 vendor/golang.org/x/text/cases/tables15.0.0.go
create mode 100644 vendor/golang.org/x/text/secure/precis/tables15.0.0.go
create mode 100644 vendor/golang.org/x/text/unicode/bidi/tables15.0.0.go
create mode 100644 vendor/golang.org/x/text/unicode/norm/tables15.0.0.go
create mode 100644 vendor/golang.org/x/text/width/tables15.0.0.go
create mode 100644 vendor/howett.net/plist/.gitignore
create mode 100644 vendor/howett.net/plist/.gitlab-ci.yml
create mode 100644 vendor/howett.net/plist/LICENSE
create mode 100644 vendor/howett.net/plist/README.md
create mode 100644 vendor/howett.net/plist/bplist.go
create mode 100644 vendor/howett.net/plist/bplist_generator.go
create mode 100644 vendor/howett.net/plist/bplist_parser.go
create mode 100644 vendor/howett.net/plist/decode.go
create mode 100644 vendor/howett.net/plist/doc.go
create mode 100644 vendor/howett.net/plist/encode.go
create mode 100644 vendor/howett.net/plist/fuzz.go
create mode 100644 vendor/howett.net/plist/marshal.go
create mode 100644 vendor/howett.net/plist/must.go
create mode 100644 vendor/howett.net/plist/plist.go
create mode 100644 vendor/howett.net/plist/plist_types.go
create mode 100644 vendor/howett.net/plist/text_generator.go
create mode 100644 vendor/howett.net/plist/text_parser.go
create mode 100644 vendor/howett.net/plist/text_tables.go
create mode 100644 vendor/howett.net/plist/typeinfo.go
create mode 100644 vendor/howett.net/plist/unmarshal.go
create mode 100644 vendor/howett.net/plist/util.go
create mode 100644 vendor/howett.net/plist/xml_generator.go
create mode 100644 vendor/howett.net/plist/xml_parser.go
create mode 100644 vendor/howett.net/plist/zerocopy.go
create mode 100644 vendor/howett.net/plist/zerocopy_appengine.go
diff --git a/go.mod b/go.mod
index b1a10d70..12f3a560 100644
--- a/go.mod
+++ b/go.mod
@@ -16,6 +16,8 @@ require (
github.com/google/uuid v1.3.0
github.com/hanwen/go-fuse/v2 v2.3.0
github.com/hyponet/eventbus v1.0.0
+ github.com/hyponet/webpage-packer v0.0.0-20230930052235-73553a8dce4a
+ github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80
github.com/minio/minio-go/v7 v7.0.52
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.27.2
@@ -26,13 +28,15 @@ require (
github.com/tickstep/aliyunpan-api v0.1.6
github.com/tickstep/library-go v0.1.0
go.uber.org/zap v1.24.0
- golang.org/x/net v0.11.0
- golang.org/x/sys v0.9.0
+ golang.org/x/net v0.15.0
+ golang.org/x/sys v0.12.0
gopkg.in/yaml.v3 v3.0.1
gorm.io/driver/postgres v1.3.7
)
require (
+ github.com/PuerkitoBio/goquery v1.8.1 // indirect
+ github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34 // indirect
@@ -46,6 +50,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect
+ github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/btcsuite/btcd v0.22.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
@@ -57,9 +62,13 @@ require (
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.14.1 // indirect
+ github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect
+ github.com/go-shiori/go-readability v0.0.0-20230421032831-c66949dfc0ad // indirect
github.com/goccy/go-json v0.10.2 // indirect
+ github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/go-cmp v0.5.9 // indirect
+ github.com/gorilla/css v1.0.0 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgconn v1.12.1 // indirect
github.com/jackc/pgio v1.0.0 // indirect
@@ -75,6 +84,7 @@ require (
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
+ github.com/microcosm-cc/bluemonday v1.0.25 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/minio/sha256-simd v1.0.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -89,11 +99,12 @@ require (
github.com/sirupsen/logrus v1.9.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
- golang.org/x/text v0.10.0 // indirect
+ golang.org/x/text v0.13.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
+ howett.net/plist v1.0.0 // indirect
modernc.org/libc v1.22.2 // indirect
modernc.org/mathutil v1.5.0 // indirect
modernc.org/memory v1.5.0 // indirect
@@ -115,6 +126,6 @@ require (
go.uber.org/atomic v1.10.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
golang.org/x/arch v0.3.0 // indirect
- golang.org/x/crypto v0.10.0 // indirect
+ golang.org/x/crypto v0.13.0 // indirect
gorm.io/gorm v1.24.6
)
diff --git a/go.sum b/go.sum
index aae7d343..124d4158 100644
--- a/go.sum
+++ b/go.sum
@@ -1,9 +1,14 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc=
github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs=
+github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
+github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII=
github.com/aliyun/aliyun-oss-go-sdk v2.2.7+incompatible h1:KpbJFXwhVeuxNtBJ74MCGbIoaBok2uZvkD7QXp2+Wis=
github.com/aliyun/aliyun-oss-go-sdk v2.2.7+incompatible/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
+github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/aws/aws-sdk-go-v2 v1.18.1 h1:+tefE750oAb7ZQGzla6bLkOwfcQCEtC5y2RqoqCeqKo=
github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 h1:dK82zF6kkPeCo8J1e+tGx4JdvDIQzj7ygIoLg8WMuGs=
@@ -40,6 +45,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 h1:XFJ2Z6sNUUcAz9poj+245DMkrHE4
github.com/aws/aws-sdk-go-v2/service/sts v1.19.2/go.mod h1:dp0yLPsLBOi++WTxzCjA/oZqi6NPIhoR+uF7GeMU9eg=
github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
+github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
+github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
@@ -122,6 +129,10 @@ github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91
github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos=
github.com/go-playground/validator/v10 v10.14.1 h1:9c50NUPC30zyuKprjL3vNZ0m5oG+jU0zvx4AqHGnv4k=
github.com/go-playground/validator/v10 v10.14.1/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
+github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziHZixGO5ZBS73cKqVzZipfrLmO1w=
+github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM=
+github.com/go-shiori/go-readability v0.0.0-20230421032831-c66949dfc0ad h1:3VP5Q8Mh165h2DHmXWFT4LJlwwvgTRlEuoe2vnsVnJ4=
+github.com/go-shiori/go-readability v0.0.0-20230421032831-c66949dfc0ad/go.mod h1:2DpZlTJO/ycxp/vsc/C11oUyveStOgIXB88SYV1lncI=
github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0 h1:p104kn46Q8WdvHunIJ9dAyjPVtrBPhSr3KT2yUst43I=
@@ -131,6 +142,8 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/gofrs/uuid v4.0.0+incompatible h1:1SD/1F5pU8p29ybwgQSwpQk+mwdRrXCYuPhW6m+TnJw=
github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
+github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
+github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY=
github.com/golang-sql/sqlexp v0.0.0-20170517235910-f1bb20e5a188 h1:+eHOFJl1BaXrQxKX+T06f78590z4qA2ZzBTqahsKSE4=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -156,11 +169,15 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
+github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
github.com/hanwen/go-fuse/v2 v2.3.0 h1:t5ivNIH2PK+zw4OBul/iJjsoG9K6kXo4nMDoBpciC8A=
github.com/hanwen/go-fuse/v2 v2.3.0/go.mod h1:xKwi1cF7nXAOBCXujD5ie0ZKsxc8GGSA1rlMJc+8IJs=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/hyponet/eventbus v1.0.0 h1:Cl1v4Ge1/ILn/z4nBhxu1cTny8joRPDj3pqRudlbO+w=
github.com/hyponet/eventbus v1.0.0/go.mod h1:5XPvonkyxwwNSMEqnpuSh1NlW3KZKpRr9DNKkZBBuyk=
+github.com/hyponet/webpage-packer v0.0.0-20230930052235-73553a8dce4a h1:h/MjGu9KXGqsIrCH5BEvvwTpMY0ZpuWJhkQi2LNPqGc=
+github.com/hyponet/webpage-packer v0.0.0-20230930052235-73553a8dce4a/go.mod h1:2qcy+SgeIQHRG6grhK9oMWFk5Fnh65U5qy47ij5Sw3A=
github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
@@ -247,6 +264,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
@@ -268,6 +287,8 @@ github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m
github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
+github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJWm9rtloEg=
+github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.0.52 h1:8XhG36F6oKQUDDSuz6dY3rioMzovKjW40W6ANuN0Dps=
@@ -331,6 +352,7 @@ github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThC
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
@@ -375,6 +397,7 @@ github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
@@ -411,12 +434,14 @@ golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWP
golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
-golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
+golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180719180050-a680a1efc54d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@@ -426,13 +451,20 @@ golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU=
-golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
+golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -451,26 +483,34 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
-golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
-golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -483,7 +523,9 @@ golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtn
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -512,6 +554,7 @@ gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
+gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
@@ -531,6 +574,8 @@ gorm.io/gorm v1.23.4/go.mod h1:l2lP/RyAtc1ynaTjFksBde/O8v9oOGIApu2/xRitmZk=
gorm.io/gorm v1.24.6 h1:wy98aq9oFEetsc4CAbKD2SoBCdMzsbSIvSUUFJuHi5s=
gorm.io/gorm v1.24.6/go.mod h1:L4uxeKpfBml98NYqVqwAdmV1a2nBtAec/cf3fpucW/k=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
+howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
+howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
modernc.org/libc v1.22.2 h1:4U7v51GyhlWqQmwCHj28Rdq2Yzwk55ovjFrdPjs8Hb0=
modernc.org/libc v1.22.2/go.mod h1:uvQavJ1pZ0hIoC/jfqNoMLURIMhKzINIWypNM17puug=
modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
diff --git a/pkg/dentry/group.go b/pkg/dentry/group.go
index 4e322d72..e67e9712 100644
--- a/pkg/dentry/group.go
+++ b/pkg/dentry/group.go
@@ -21,7 +21,7 @@ import (
"fmt"
"github.com/basenana/nanafs/pkg/metastore"
"github.com/basenana/nanafs/pkg/plugin"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
"github.com/basenana/nanafs/utils/logger"
"path"
@@ -243,7 +243,7 @@ func (e *extGroup) FindEntry(ctx context.Context, name string) (*types.Metadata,
}
func (e *extGroup) CreateEntry(ctx context.Context, attr EntryAttr) (*types.Metadata, error) {
- mirrorEn, err := e.mirror.CreateEntry(ctx, stub.EntryAttr{
+ mirrorEn, err := e.mirror.CreateEntry(ctx, pluginapi.EntryAttr{
Name: attr.Name,
Kind: attr.Kind,
})
@@ -325,7 +325,7 @@ func (e *extGroup) ListChildren(ctx context.Context) ([]*types.Metadata, error)
}
recordChildMap := make(map[string]*types.Metadata)
- actualChildMap := make(map[string]*stub.Entry)
+ actualChildMap := make(map[string]*pluginapi.Entry)
for i := range recordChild {
recordChildMap[recordChild[i].Name] = recordChild[i]
}
@@ -355,7 +355,7 @@ func (e *extGroup) ListChildren(ctx context.Context) ([]*types.Metadata, error)
return result, nil
}
-func (e *extGroup) syncEntry(ctx context.Context, mirrored *stub.Entry, crt *types.Metadata) (en *types.Metadata, err error) {
+func (e *extGroup) syncEntry(ctx context.Context, mirrored *pluginapi.Entry, crt *types.Metadata) (en *types.Metadata, err error) {
grp, err := e.stdGroup.cacheStore.getEntry(ctx, e.stdGroup.entryID)
if err != nil {
return nil, err
diff --git a/pkg/dentry/group_test.go b/pkg/dentry/group_test.go
index 44aa98ca..f9bf7474 100644
--- a/pkg/dentry/group_test.go
+++ b/pkg/dentry/group_test.go
@@ -19,7 +19,7 @@ package dentry
import (
"context"
"github.com/basenana/nanafs/pkg/plugin"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
@@ -314,7 +314,7 @@ var _ = Describe("TestExtGroupEntry", func() {
Expect(len(need)).Should(Equal(0))
})
It("insert sync_file1.yaml to memfs should be succeed", func() {
- _, err = memFS.CreateEntry(context.TODO(), stub.EntryAttr{
+ _, err = memFS.CreateEntry(context.TODO(), pluginapi.EntryAttr{
Name: "sync_file1.yaml",
Kind: types.RawKind,
})
@@ -335,7 +335,7 @@ var _ = Describe("TestExtGroupEntry", func() {
Expect(len(need)).Should(Equal(0))
})
It("insert sync_file2.yaml to memfs should be succeed", func() {
- _, err = memFS.CreateEntry(context.TODO(), stub.EntryAttr{
+ _, err = memFS.CreateEntry(context.TODO(), pluginapi.EntryAttr{
Name: "sync_file2.yaml",
Kind: types.RawKind,
})
@@ -355,7 +355,7 @@ var _ = Describe("TestExtGroupEntry", func() {
Expect(len(need)).Should(Equal(0))
})
It("delete sync_file2.yaml should be succeed", func() {
- err = memFS.RemoveEntry(context.TODO(), &stub.Entry{
+ err = memFS.RemoveEntry(context.TODO(), &pluginapi.Entry{
Name: "sync_file2.yaml",
Kind: types.RawKind,
})
diff --git a/pkg/plugin/buildin/docloader/csv.go b/pkg/plugin/buildin/docloader/csv.go
new file mode 100644
index 00000000..b0297d50
--- /dev/null
+++ b/pkg/plugin/buildin/docloader/csv.go
@@ -0,0 +1,80 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package docloader
+
+import (
+ "context"
+ "encoding/csv"
+ "errors"
+ "fmt"
+ "github.com/basenana/nanafs/pkg/types"
+ "io"
+ "os"
+ "strings"
+)
+
+const (
+ csvLoader = "csv"
+)
+
+type CSV struct {
+ docPath string
+}
+
+func NewCSV(docPath string, option map[string]string) CSV {
+ return CSV{docPath: docPath}
+}
+
+func (c CSV) Load(_ context.Context) (result []types.FDocument, err error) {
+ f, err := os.Open(c.docPath)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var header []string
+ var rown int
+
+ rd := csv.NewReader(f)
+ for {
+ row, err := rd.Read()
+ if errors.Is(err, io.EOF) {
+ break
+ }
+ if err != nil {
+ return nil, err
+ }
+ if len(header) == 0 {
+ header = append(header, row...)
+ continue
+ }
+
+ var content []string
+ for i, value := range row {
+ line := fmt.Sprintf("%s: %s", header[i], value)
+ content = append(content, line)
+ }
+
+ rown++
+ result = append(result, types.FDocument{
+ Content: strings.Join(content, "\n"),
+ Metadata: map[string]any{"type": csvLoader, "row": rown},
+ })
+ }
+
+ return
+}
diff --git a/pkg/plugin/buildin/docloader/docloader.go b/pkg/plugin/buildin/docloader/docloader.go
new file mode 100644
index 00000000..f8403e7a
--- /dev/null
+++ b/pkg/plugin/buildin/docloader/docloader.go
@@ -0,0 +1,102 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package docloader
+
+import (
+ "context"
+ "fmt"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
+ "github.com/basenana/nanafs/pkg/types"
+ "os"
+ "path/filepath"
+)
+
+const (
+ PluginName = "docloader"
+ PluginVersion = "1.0"
+)
+
+type DocLoader struct{}
+
+func (d DocLoader) Name() string {
+ return PluginName
+}
+
+func (d DocLoader) Type() types.PluginType {
+ return types.TypeProcess
+}
+
+func (d DocLoader) Version() string {
+ return PluginVersion
+}
+
+func (d DocLoader) Run(ctx context.Context, request *pluginapi.Request, pluginParams map[string]string) (*pluginapi.Response, error) {
+ entryPath := request.Parameter[pluginapi.ResEntryPathKey].(string)
+ if entryPath == "" {
+ resp := pluginapi.NewFailedResponse("entry_path is empty")
+ return resp, nil
+ }
+
+ _, err := os.Stat(entryPath)
+ if err != nil {
+ resp := pluginapi.NewFailedResponse(fmt.Sprintf("stat entry file %s failed: %s", entryPath, err))
+ return resp, nil
+ }
+
+ fileExt := filepath.Ext(entryPath)
+ var (
+ p Parser
+ parseOption = map[string]string{}
+ )
+
+ switch fileExt {
+ case ".pdf":
+ p = buildInLoaders[pdfParser](entryPath, parseOption)
+ case ".txt":
+ p = buildInLoaders[textParser](entryPath, parseOption)
+ case ".html", ".htm":
+ p = buildInLoaders[htmlParser](entryPath, parseOption)
+ case ".webarchive":
+ p = buildInLoaders[webArchiveParser](entryPath, parseOption)
+ default:
+ resp := pluginapi.NewFailedResponse(fmt.Sprintf("load %s file unsupported", fileExt))
+ return resp, nil
+ }
+
+ documents, err := p.Load(ctx)
+ if err != nil {
+ resp := pluginapi.NewFailedResponse(fmt.Sprintf("load file %s failed: %s", entryPath, err))
+ return resp, nil
+ }
+
+ return pluginapi.NewResponseWithResult(map[string]any{pluginapi.ResEntryDocumentsKey: documents}), nil
+}
+
+type Parser interface {
+ Load(ctx context.Context) (result []types.FDocument, err error)
+}
+
+type parserBuilder func(docPath string, docOption map[string]string) Parser
+
+var (
+ buildInLoaders = map[string]parserBuilder{
+ textParser: NewText,
+ pdfParser: NewPDF,
+ htmlParser: NewHTML,
+ webArchiveParser: NewHTML,
+ }
+)
diff --git a/pkg/plugin/buildin/docloader/epub.go b/pkg/plugin/buildin/docloader/epub.go
new file mode 100644
index 00000000..267f1662
--- /dev/null
+++ b/pkg/plugin/buildin/docloader/epub.go
@@ -0,0 +1,17 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package docloader
diff --git a/pkg/plugin/buildin/docloader/html.go b/pkg/plugin/buildin/docloader/html.go
new file mode 100644
index 00000000..e50812c2
--- /dev/null
+++ b/pkg/plugin/buildin/docloader/html.go
@@ -0,0 +1,66 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package docloader
+
+import (
+ "context"
+ "github.com/basenana/nanafs/pkg/types"
+ "github.com/hyponet/webpage-packer/packer"
+ "strings"
+)
+
+const (
+ htmlParser = "html"
+ webArchiveParser = "webarchive"
+)
+
+type HTML struct {
+ docPath string
+}
+
+func NewHTML(docPath string, option map[string]string) Parser {
+ return HTML{docPath: docPath}
+}
+
+func (h HTML) Load(ctx context.Context) (result []types.FDocument, err error) {
+ var (
+ p packer.Packer
+ docType = "html"
+ )
+ switch {
+ case strings.HasSuffix(h.docPath, ".webarchive"):
+ p = packer.NewWebArchivePacker()
+ docType = "webarchive"
+
+ case strings.HasSuffix(h.docPath, ".html") ||
+ strings.HasSuffix(h.docPath, ".htm"):
+ p = packer.NewHtmlPacker()
+ }
+
+ content, err := p.ReadContent(ctx, packer.Option{
+ FilePath: h.docPath,
+ ClutterFree: true,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ return []types.FDocument{{
+ Content: content,
+ Metadata: map[string]any{"type": docType},
+ }}, nil
+}
diff --git a/pkg/plugin/buildin/docloader/pdf.go b/pkg/plugin/buildin/docloader/pdf.go
new file mode 100644
index 00000000..448ae547
--- /dev/null
+++ b/pkg/plugin/buildin/docloader/pdf.go
@@ -0,0 +1,107 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package docloader
+
+import (
+ "context"
+ "github.com/basenana/nanafs/pkg/types"
+ "github.com/ledongthuc/pdf"
+ "os"
+)
+
+const (
+ pdfParser = "pdf"
+)
+
+type PDF struct {
+ docPath string
+ password string
+}
+
+func NewPDF(docPath string, option map[string]string) Parser {
+ return newPDFWithPassword(docPath, option["password"])
+}
+
+func newPDFWithPassword(docPath, pass string) Parser {
+ return &PDF{docPath: docPath, password: pass}
+}
+
+func (p *PDF) Load(_ context.Context) ([]types.FDocument, error) {
+ fInfo, err := os.Stat(p.docPath)
+ if err != nil {
+ return nil, err
+ }
+
+ f, err := os.Open(p.docPath)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ var reader *pdf.Reader
+ if p.password != "" {
+ reader, err = pdf.NewReaderEncrypted(f, fInfo.Size(), p.getAndCleanPassword)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ reader, err = pdf.NewReader(f, fInfo.Size())
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ var (
+ numPages = reader.NumPage()
+ result = make([]types.FDocument, 0)
+ )
+
+ fonts := make(map[string]*pdf.Font)
+ for i := 1; i < numPages+1; i++ {
+ page := reader.Page(i)
+ for _, name := range page.Fonts() {
+ if _, ok := fonts[name]; !ok {
+ f := page.Font(name)
+ fonts[name] = &f
+ }
+ }
+ text, err := page.GetPlainText(fonts)
+ if err != nil {
+ return nil, err
+ }
+
+ result = append(result, types.FDocument{
+ Content: text,
+ Metadata: map[string]any{
+ "type": "pdf",
+ "page": i,
+ "total_pages": numPages,
+ },
+ })
+ }
+
+ return result, nil
+}
+
+func (p *PDF) getAndCleanPassword() string {
+ pass := p.password
+ if pass != "" {
+ // set password empty to stop retry
+ p.password = ""
+ }
+ return pass
+}
diff --git a/pkg/plugin/buildin/docloader/plaintext.go b/pkg/plugin/buildin/docloader/plaintext.go
new file mode 100644
index 00000000..a66d68c4
--- /dev/null
+++ b/pkg/plugin/buildin/docloader/plaintext.go
@@ -0,0 +1,56 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package docloader
+
+import (
+ "bytes"
+ "context"
+ "github.com/basenana/nanafs/pkg/types"
+ "io"
+ "os"
+)
+
+const (
+ textParser = "text"
+)
+
+type Text struct {
+ docPath string
+}
+
+func NewText(docPath string, option map[string]string) Parser { return Text{docPath: docPath} }
+
+func (l Text) Load(_ context.Context) ([]types.FDocument, error) {
+ f, err := os.Open(l.docPath)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ buf := new(bytes.Buffer)
+ _, err = io.Copy(buf, f)
+ if err != nil {
+ return nil, err
+ }
+
+ return []types.FDocument{
+ {
+ Content: buf.String(),
+ Metadata: map[string]any{},
+ },
+ }, nil
+}
diff --git a/pkg/plugin/buildin/rss.go b/pkg/plugin/buildin/rss.go
index 3cd9d957..130329ab 100644
--- a/pkg/plugin/buildin/rss.go
+++ b/pkg/plugin/buildin/rss.go
@@ -19,7 +19,7 @@ package buildin
import (
"context"
"github.com/basenana/nanafs/pkg/metastore"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
"go.uber.org/zap"
)
@@ -76,7 +76,7 @@ func (r *RssSourcePlugin) listRssSources(ctx context.Context) ([]rssSource, erro
return result, nil
}
-func (r *RssSourcePlugin) Run(ctx context.Context, request *stub.Request, params map[string]string) (*stub.Response, error) {
+func (r *RssSourcePlugin) Run(ctx context.Context, request *pluginapi.Request, pluginParams map[string]string) (*pluginapi.Response, error) {
rssSourceList, err := r.listRssSources(ctx)
if err != nil {
r.logger.Errorw("list rss source failed", "err", err)
@@ -85,10 +85,10 @@ func (r *RssSourcePlugin) Run(ctx context.Context, request *stub.Request, params
for i := range rssSourceList {
source := rssSourceList[i]
- r.syncRssSource(ctx, source, params)
+ r.syncRssSource(ctx, source, pluginParams)
}
- resp := &stub.Response{
+ resp := &pluginapi.Response{
IsSucceed: true,
}
return resp, nil
diff --git a/pkg/plugin/mirror.go b/pkg/plugin/mirror.go
index c34fb143..3e22674b 100644
--- a/pkg/plugin/mirror.go
+++ b/pkg/plugin/mirror.go
@@ -19,7 +19,7 @@ package plugin
import (
"context"
"fmt"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
"github.com/basenana/nanafs/utils"
"io"
@@ -31,11 +31,11 @@ type MirrorPlugin interface {
Plugin
IsGroup(ctx context.Context) (bool, error)
- FindEntry(ctx context.Context, name string) (*stub.Entry, error)
- CreateEntry(ctx context.Context, attr stub.EntryAttr) (*stub.Entry, error)
- UpdateEntry(ctx context.Context, en *stub.Entry) error
- RemoveEntry(ctx context.Context, en *stub.Entry) error
- ListChildren(ctx context.Context) ([]*stub.Entry, error)
+ FindEntry(ctx context.Context, name string) (*pluginapi.Entry, error)
+ CreateEntry(ctx context.Context, attr pluginapi.EntryAttr) (*pluginapi.Entry, error)
+ UpdateEntry(ctx context.Context, en *pluginapi.Entry) error
+ RemoveEntry(ctx context.Context, en *pluginapi.Entry) error
+ ListChildren(ctx context.Context) ([]*pluginapi.Entry, error)
WriteAt(ctx context.Context, data []byte, off int64) (int64, error)
ReadAt(ctx context.Context, dest []byte, off int64) (int64, error)
@@ -104,23 +104,23 @@ func (d *MemFSPlugin) IsGroup(ctx context.Context) (bool, error) {
return en.IsGroup, nil
}
-func (d *MemFSPlugin) FindEntry(ctx context.Context, name string) (*stub.Entry, error) {
+func (d *MemFSPlugin) FindEntry(ctx context.Context, name string) (*pluginapi.Entry, error) {
return d.fs.FindEntry(d.path, name)
}
-func (d *MemFSPlugin) CreateEntry(ctx context.Context, attr stub.EntryAttr) (*stub.Entry, error) {
+func (d *MemFSPlugin) CreateEntry(ctx context.Context, attr pluginapi.EntryAttr) (*pluginapi.Entry, error) {
return d.fs.CreateEntry(d.path, attr)
}
-func (d *MemFSPlugin) UpdateEntry(ctx context.Context, en *stub.Entry) error {
+func (d *MemFSPlugin) UpdateEntry(ctx context.Context, en *pluginapi.Entry) error {
return d.fs.UpdateEntry(d.path, en)
}
-func (d *MemFSPlugin) RemoveEntry(ctx context.Context, en *stub.Entry) error {
+func (d *MemFSPlugin) RemoveEntry(ctx context.Context, en *pluginapi.Entry) error {
return d.fs.RemoveEntry(d.path, en)
}
-func (d *MemFSPlugin) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
+func (d *MemFSPlugin) ListChildren(ctx context.Context) ([]*pluginapi.Entry, error) {
return d.fs.ListChildren(d.path)
}
@@ -145,13 +145,13 @@ func (d *MemFSPlugin) Close(ctx context.Context) error {
}
type MemFS struct {
- entries map[string]*stub.Entry
+ entries map[string]*pluginapi.Entry
files map[string]*memFile
groups map[string][]string
mux sync.Mutex
}
-func (m *MemFS) GetEntry(enPath string) (*stub.Entry, error) {
+func (m *MemFS) GetEntry(enPath string) (*pluginapi.Entry, error) {
m.mux.Lock()
defer m.mux.Unlock()
@@ -162,7 +162,7 @@ func (m *MemFS) GetEntry(enPath string) (*stub.Entry, error) {
return en, nil
}
-func (m *MemFS) FindEntry(parentPath string, name string) (*stub.Entry, error) {
+func (m *MemFS) FindEntry(parentPath string, name string) (*pluginapi.Entry, error) {
m.mux.Lock()
defer m.mux.Unlock()
@@ -173,7 +173,7 @@ func (m *MemFS) FindEntry(parentPath string, name string) (*stub.Entry, error) {
return en, nil
}
-func (m *MemFS) CreateEntry(parentPath string, attr stub.EntryAttr) (*stub.Entry, error) {
+func (m *MemFS) CreateEntry(parentPath string, attr pluginapi.EntryAttr) (*pluginapi.Entry, error) {
m.mux.Lock()
defer m.mux.Unlock()
@@ -195,7 +195,7 @@ func (m *MemFS) CreateEntry(parentPath string, attr stub.EntryAttr) (*stub.Entry
child = append(child, attr.Name)
m.groups[parentPath] = child
- en := &stub.Entry{
+ en := &pluginapi.Entry{
Name: attr.Name,
Kind: attr.Kind,
IsGroup: types.IsGroup(attr.Kind),
@@ -210,7 +210,7 @@ func (m *MemFS) CreateEntry(parentPath string, attr stub.EntryAttr) (*stub.Entry
return en, nil
}
-func (m *MemFS) UpdateEntry(parentPath string, en *stub.Entry) error {
+func (m *MemFS) UpdateEntry(parentPath string, en *pluginapi.Entry) error {
m.mux.Lock()
defer m.mux.Unlock()
@@ -225,7 +225,7 @@ func (m *MemFS) UpdateEntry(parentPath string, en *stub.Entry) error {
return nil
}
-func (m *MemFS) RemoveEntry(parentPath string, en *stub.Entry) error {
+func (m *MemFS) RemoveEntry(parentPath string, en *pluginapi.Entry) error {
m.mux.Lock()
defer m.mux.Unlock()
@@ -262,7 +262,7 @@ func (m *MemFS) RemoveEntry(parentPath string, en *stub.Entry) error {
return nil
}
-func (m *MemFS) ListChildren(enPath string) ([]*stub.Entry, error) {
+func (m *MemFS) ListChildren(enPath string) ([]*pluginapi.Entry, error) {
m.mux.Lock()
defer m.mux.Unlock()
@@ -276,7 +276,7 @@ func (m *MemFS) ListChildren(enPath string) ([]*stub.Entry, error) {
}
childNames := m.groups[enPath]
- result := make([]*stub.Entry, len(childNames))
+ result := make([]*pluginapi.Entry, len(childNames))
for i, chName := range childNames {
result[i] = m.entries[path.Join(enPath, chName)]
}
@@ -322,7 +322,7 @@ func (m *MemFS) Trunc(filePath string) error {
func NewMemFS() *MemFS {
fs := &MemFS{
- entries: map[string]*stub.Entry{"/": {Name: ".", Kind: types.ExternalGroupKind, Size: 0, IsGroup: true}},
+ entries: map[string]*pluginapi.Entry{"/": {Name: ".", Kind: types.ExternalGroupKind, Size: 0, IsGroup: true}},
groups: map[string][]string{"/": {}},
files: map[string]*memFile{},
}
@@ -334,11 +334,11 @@ const (
)
type memFile struct {
- *stub.Entry
+ *pluginapi.Entry
data []byte
}
-func newMemFile(entry *stub.Entry) *memFile {
+func newMemFile(entry *pluginapi.Entry) *memFile {
return &memFile{
Entry: entry,
data: utils.NewMemoryBlock(memFileMaxSize / 16), // 1M
diff --git a/pkg/plugin/pluginapi/consts.go b/pkg/plugin/pluginapi/consts.go
new file mode 100644
index 00000000..4b94c6ad
--- /dev/null
+++ b/pkg/plugin/pluginapi/consts.go
@@ -0,0 +1,28 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package pluginapi
+
+const (
+ ResEntryIdKey = "nanafs.workflow.entry_id"
+ ResEntryPathKey = "nanafs.workflow.entry_path"
+ ResEntryDocumentsKey = "nanafs.workflow.entry_documents"
+ ResCollectManifest = "nanafs.workflow.collect_manifest"
+ ResPluginName = "nanafs.workflow.plugin_name"
+ ResPluginVersion = "nanafs.workflow.plugin_version"
+ ResPluginType = "nanafs.workflow.plugin_type"
+ ResPluginAction = "nanafs.workflow.plugin_action"
+)
diff --git a/pkg/plugin/stub/entry.go b/pkg/plugin/pluginapi/entry.go
similarity index 97%
rename from pkg/plugin/stub/entry.go
rename to pkg/plugin/pluginapi/entry.go
index 1adc5bee..709d11bb 100644
--- a/pkg/plugin/stub/entry.go
+++ b/pkg/plugin/pluginapi/entry.go
@@ -14,7 +14,7 @@
limitations under the License.
*/
-package stub
+package pluginapi
import (
"github.com/basenana/nanafs/pkg/types"
diff --git a/pkg/plugin/stub/process.go b/pkg/plugin/pluginapi/process.go
similarity index 74%
rename from pkg/plugin/stub/process.go
rename to pkg/plugin/pluginapi/process.go
index 4649d2da..70ac2f4b 100644
--- a/pkg/plugin/stub/process.go
+++ b/pkg/plugin/pluginapi/process.go
@@ -14,14 +14,14 @@
limitations under the License.
*/
-package stub
+package pluginapi
type Request struct {
Action string
WorkPath string
EntryId int64
EntryPath string
- Parameter map[string]string
+ Parameter map[string]any
}
func NewRequest() *Request {
@@ -31,9 +31,17 @@ func NewRequest() *Request {
type Response struct {
IsSucceed bool
Message string
- Entries []Entry
+ Results map[string]any
}
func NewResponse() *Response {
return &Response{}
}
+
+func NewFailedResponse(msg string) *Response {
+ return &Response{IsSucceed: false, Message: msg}
+}
+
+func NewResponseWithResult(result map[string]any) *Response {
+ return &Response{IsSucceed: true, Results: result}
+}
diff --git a/pkg/plugin/process.go b/pkg/plugin/process.go
index 25c4290f..d99e6e69 100644
--- a/pkg/plugin/process.go
+++ b/pkg/plugin/process.go
@@ -19,17 +19,24 @@ package plugin
import (
"context"
"fmt"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/buildin/docloader"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
+ "github.com/basenana/nanafs/utils"
"time"
)
type ProcessPlugin interface {
Plugin
- Run(ctx context.Context, request *stub.Request, params map[string]string) (*stub.Response, error)
+ Run(ctx context.Context, request *pluginapi.Request, pluginParams map[string]string) (*pluginapi.Response, error)
}
-func Call(ctx context.Context, ps types.PlugScope, req *stub.Request) (resp *stub.Response, err error) {
+func Call(ctx context.Context, ps types.PlugScope, req *pluginapi.Request) (resp *pluginapi.Response, err error) {
+ defer func() {
+ if rErr := utils.Recover(); rErr != nil {
+ err = rErr
+ }
+ }()
var plugin Plugin
plugin, err = BuildPlugin(ctx, ps)
if err != nil {
@@ -64,7 +71,7 @@ func (d *DelayProcessPlugin) Version() string {
return delayPluginVersion
}
-func (d *DelayProcessPlugin) Run(ctx context.Context, request *stub.Request, params map[string]string) (*stub.Response, error) {
+func (d *DelayProcessPlugin) Run(ctx context.Context, request *pluginapi.Request, pluginParams map[string]string) (*pluginapi.Response, error) {
var (
until time.Time
nowTime = time.Now()
@@ -72,7 +79,7 @@ func (d *DelayProcessPlugin) Run(ctx context.Context, request *stub.Request, par
switch request.Action {
case "delay":
- delayDurationStr := params["delay"]
+ delayDurationStr := pluginParams["delay"]
duration, err := time.ParseDuration(delayDurationStr)
if err != nil {
return nil, fmt.Errorf("parse delay duration [%s] failed: %s", delayDurationStr, err)
@@ -81,14 +88,14 @@ func (d *DelayProcessPlugin) Run(ctx context.Context, request *stub.Request, par
case "until":
var err error
- untilStr := params["until"]
+ untilStr := pluginParams["until"]
until, err = time.Parse(untilStr, time.RFC3339)
if err != nil {
return nil, fmt.Errorf("parse delay until [%s] failed: %s", untilStr, err)
}
default:
- resp := stub.NewResponse()
+ resp := pluginapi.NewResponse()
resp.Message = fmt.Sprintf("unknown action: %s", request.Action)
return resp, nil
}
@@ -98,16 +105,16 @@ func (d *DelayProcessPlugin) Run(ctx context.Context, request *stub.Request, par
defer timer.Stop()
select {
case <-timer.C:
- return &stub.Response{IsSucceed: true}, nil
+ return &pluginapi.Response{IsSucceed: true}, nil
case <-ctx.Done():
- return &stub.Response{IsSucceed: false, Message: ctx.Err().Error()}, nil
+ return &pluginapi.Response{IsSucceed: false, Message: ctx.Err().Error()}, nil
}
}
- return &stub.Response{IsSucceed: true}, nil
+ return &pluginapi.Response{IsSucceed: true}, nil
}
-func registerDelayPlugin(r *registry) {
+func registerBuildInProcessPlugin(r *registry) {
r.Register(
delayPluginName,
types.PluginSpec{Name: delayPluginName, Version: delayPluginVersion, Type: types.TypeProcess, Parameters: map[string]string{}},
@@ -115,4 +122,12 @@ func registerDelayPlugin(r *registry) {
return &DelayProcessPlugin{}, nil
},
)
+
+ r.Register(
+ docloader.PluginName,
+ types.PluginSpec{Name: docloader.PluginName, Version: docloader.PluginVersion, Type: types.TypeProcess, Parameters: map[string]string{}},
+ func(ctx context.Context, spec types.PluginSpec, scope types.PlugScope) (Plugin, error) {
+ return &docloader.DocLoader{}, nil
+ },
+ )
}
diff --git a/pkg/plugin/registry.go b/pkg/plugin/registry.go
index d4945a1d..d26b93d6 100644
--- a/pkg/plugin/registry.go
+++ b/pkg/plugin/registry.go
@@ -70,7 +70,7 @@ func Init(cfg *config.Plugin, recorderGetter metastore.PluginRecorderGetter) err
}
// register build-in plugins
- registerDelayPlugin(r)
+ registerBuildInProcessPlugin(r)
registerMemfsPlugin(r)
register3BodyPlugin(r)
diff --git a/pkg/plugin/source.go b/pkg/plugin/source.go
index 8cc327c4..f1565e8a 100644
--- a/pkg/plugin/source.go
+++ b/pkg/plugin/source.go
@@ -20,7 +20,7 @@ import (
"bytes"
"context"
"fmt"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
"io"
"io/ioutil"
@@ -31,8 +31,8 @@ import (
type SourcePlugin interface {
Plugin
- Fresh(ctx context.Context, opt stub.FreshOption) ([]*stub.Entry, error)
- Open(ctx context.Context, entry *stub.Entry) (io.ReadCloser, error)
+ Fresh(ctx context.Context, opt pluginapi.FreshOption) ([]*pluginapi.Entry, error)
+ Open(ctx context.Context, entry *pluginapi.Entry) (io.ReadCloser, error)
}
const (
@@ -56,14 +56,14 @@ func (d *ThreeBodyPlugin) Version() string {
return the3BodyPluginVersion
}
-func (d *ThreeBodyPlugin) Fresh(ctx context.Context, opt stub.FreshOption) ([]*stub.Entry, error) {
+func (d *ThreeBodyPlugin) Fresh(ctx context.Context, opt pluginapi.FreshOption) ([]*pluginapi.Entry, error) {
crtAt := time.Now().Unix()
- result := make([]*stub.Entry, 0)
+ result := make([]*pluginapi.Entry, 0)
for i := crtAt - 60; i < crtAt; i += 60 {
if i <= opt.LastFreshAt.Unix() {
continue
}
- result = append(result, &stub.Entry{
+ result = append(result, &pluginapi.Entry{
Name: fmt.Sprintf("3_body_%d.txt", i),
Kind: types.RawKind,
IsGroup: false,
@@ -72,7 +72,7 @@ func (d *ThreeBodyPlugin) Fresh(ctx context.Context, opt stub.FreshOption) ([]*s
return result, nil
}
-func (d *ThreeBodyPlugin) Open(ctx context.Context, entry *stub.Entry) (io.ReadCloser, error) {
+func (d *ThreeBodyPlugin) Open(ctx context.Context, entry *pluginapi.Entry) (io.ReadCloser, error) {
fileNameParts := strings.Split(entry.Name, "_")
sendAtStr := fileNameParts[len(fileNameParts)-1]
diff --git a/pkg/types/friday.go b/pkg/types/friday.go
new file mode 100644
index 00000000..7094a55f
--- /dev/null
+++ b/pkg/types/friday.go
@@ -0,0 +1,22 @@
+/*
+ Copyright 2023 NanaFS Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package types
+
+type FDocument struct {
+ Content string
+ Metadata map[string]any
+}
diff --git a/pkg/workflow/exec/consts.go b/pkg/workflow/exec/consts.go
index 414b2ae6..2b01ad0f 100644
--- a/pkg/workflow/exec/consts.go
+++ b/pkg/workflow/exec/consts.go
@@ -21,12 +21,3 @@ const (
OpEntryCollect = "entryCollect"
OpPluginCall = "pluginCall"
)
-
-const (
- paramEntryIdKey = "nanafs.workflow.entry_id"
- paramEntryPathKey = "nanafs.workflow.entry_path"
- paramPluginName = "nanafs.workflow.plugin_name"
- paramPluginVersion = "nanafs.workflow.plugin_version"
- paramPluginType = "nanafs.workflow.plugin_type"
- paramPluginAction = "nanafs.workflow.plugin_action"
-)
diff --git a/pkg/workflow/exec/executor.go b/pkg/workflow/exec/executor.go
index a75a2877..608fca07 100644
--- a/pkg/workflow/exec/executor.go
+++ b/pkg/workflow/exec/executor.go
@@ -22,11 +22,14 @@ import (
"github.com/basenana/nanafs/config"
"github.com/basenana/nanafs/pkg/dentry"
"github.com/basenana/nanafs/pkg/plugin"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
"github.com/basenana/nanafs/pkg/workflow/jobrun"
"github.com/basenana/nanafs/utils/logger"
"go.uber.org/zap"
+ "os"
+ "path"
+ "sync"
)
const (
@@ -36,8 +39,11 @@ const (
func RegisterOperators(entryMgr dentry.Manager, cfg LocalConfig) error {
jobrun.RegisterExecutorBuilder(localExecName, func(job *types.WorkflowJob) jobrun.Executor {
return &localExecutor{
- job: job, entryMgr: entryMgr, config: cfg,
- logger: logger.NewLogger("localExecutor").With(zap.String("job", job.Id)),
+ job: job,
+ entryMgr: entryMgr,
+ config: cfg,
+ results: map[string]any{},
+ logger: logger.NewLogger("localExecutor").With(zap.String("job", job.Id)),
}
})
return nil
@@ -49,6 +55,8 @@ type localExecutor struct {
entryPath string
entryMgr dentry.Manager
config LocalConfig
+ results map[string]any
+ resultMux sync.Mutex
logger *zap.SugaredLogger
}
@@ -72,17 +80,30 @@ func (b *localExecutor) Setup(ctx context.Context) (err error) {
return
}
b.logger.Infow("job setup", "workdir", b.workdir, "entryPath", b.entryPath)
+
return
}
func (b *localExecutor) DoOperation(ctx context.Context, step types.WorkflowJobStep) error {
- req := stub.NewRequest()
+ req := pluginapi.NewRequest()
req.WorkPath = b.workdir
req.EntryId = b.job.Target.EntryID
req.EntryPath = b.entryPath
+ req.Parameter = map[string]any{}
+ b.resultMux.Lock()
+ for k, v := range b.results {
+ req.Parameter[k] = v
+ }
+ b.resultMux.Unlock()
+ req.Parameter[pluginapi.ResEntryIdKey] = b.job.Target.EntryID
+ req.Parameter[pluginapi.ResEntryPathKey] = b.entryPath
+ req.Parameter[pluginapi.ResPluginName] = step.Plugin.PluginName
+ req.Parameter[pluginapi.ResPluginVersion] = step.Plugin.Version
+ req.Parameter[pluginapi.ResPluginType] = step.Plugin.PluginType
+ req.Parameter[pluginapi.ResPluginAction] = step.Plugin.Action
+
req.Action = step.Plugin.PluginName
- req.Parameter = step.Plugin.Parameters
resp, err := plugin.Call(ctx, *step.Plugin, req)
if err != nil {
return fmt.Errorf("plugin action error: %s", err)
@@ -90,12 +111,29 @@ func (b *localExecutor) DoOperation(ctx context.Context, step types.WorkflowJobS
if !resp.IsSucceed {
return fmt.Errorf("plugin action failed: %s", resp.Message)
}
+ if len(resp.Results) > 0 {
+ b.resultMux.Lock()
+ for k, v := range resp.Results {
+ b.results[k] = v
+ }
+ b.resultMux.Unlock()
+ }
return nil
}
func (b *localExecutor) Collect(ctx context.Context) error {
- //TODO implement me
- panic("implement me")
+ b.resultMux.Lock()
+ filename, needCollect := b.results[pluginapi.ResCollectManifest]
+ b.resultMux.Unlock()
+ if !needCollect {
+ return nil
+ }
+ f, err := os.Open(path.Join(b.workdir, filename.(string)))
+ if err != nil {
+ return fmt.Errorf("read collect manifest file failed: %s", err)
+ }
+ defer f.Close()
+ return nil
}
func (b *localExecutor) Teardown(ctx context.Context) {
diff --git a/pkg/workflow/mirrordir.go b/pkg/workflow/mirrordir.go
index 5345532e..0a93a560 100644
--- a/pkg/workflow/mirrordir.go
+++ b/pkg/workflow/mirrordir.go
@@ -21,7 +21,7 @@ import (
"fmt"
"github.com/basenana/nanafs/pkg/dentry"
"github.com/basenana/nanafs/pkg/plugin"
- "github.com/basenana/nanafs/pkg/plugin/stub"
+ "github.com/basenana/nanafs/pkg/plugin/pluginapi"
"github.com/basenana/nanafs/pkg/types"
"github.com/basenana/nanafs/pkg/workflow/jobrun"
"github.com/basenana/nanafs/utils"
@@ -121,7 +121,7 @@ func (d *dirHandler) IsGroup(ctx context.Context) (bool, error) {
return en.IsGroup, nil
}
-func (d *dirHandler) FindEntry(ctx context.Context, name string) (*stub.Entry, error) {
+func (d *dirHandler) FindEntry(ctx context.Context, name string) (*pluginapi.Entry, error) {
if d == nil {
return nil, types.ErrNoGroup
}
@@ -138,7 +138,7 @@ func (d *dirHandler) FindEntry(ctx context.Context, name string) (*stub.Entry, e
if d.dirKind == MirrorDirRoot {
switch name {
case MirrorDirWorkflows, MirrorDirJobs:
- return d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: name, Kind: types.ExternalGroupKind})
+ return d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: name, Kind: types.ExternalGroupKind})
default:
return nil, types.ErrNotFound
}
@@ -149,7 +149,7 @@ func (d *dirHandler) FindEntry(ctx context.Context, name string) (*stub.Entry, e
if err != nil {
return nil, err
}
- return d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: name, Kind: types.RawKind})
+ return d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: name, Kind: types.RawKind})
}
if d.dirKind == MirrorDirJobs {
@@ -158,7 +158,7 @@ func (d *dirHandler) FindEntry(ctx context.Context, name string) (*stub.Entry, e
if err != nil {
return nil, err
}
- return d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: name, Kind: types.ExternalGroupKind})
+ return d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: name, Kind: types.ExternalGroupKind})
} else {
jobs, err := d.ListChildren(ctx)
if err != nil {
@@ -174,7 +174,7 @@ func (d *dirHandler) FindEntry(ctx context.Context, name string) (*stub.Entry, e
return nil, types.ErrNotFound
}
-func (d *dirHandler) CreateEntry(ctx context.Context, attr stub.EntryAttr) (*stub.Entry, error) {
+func (d *dirHandler) CreateEntry(ctx context.Context, attr pluginapi.EntryAttr) (*pluginapi.Entry, error) {
if d.dirKind == MirrorDirRoot {
return nil, types.ErrNoAccess
}
@@ -195,11 +195,11 @@ func (d *dirHandler) CreateEntry(ctx context.Context, attr stub.EntryAttr) (*stu
return en, nil
}
-func (d *dirHandler) UpdateEntry(ctx context.Context, en *stub.Entry) error {
+func (d *dirHandler) UpdateEntry(ctx context.Context, en *pluginapi.Entry) error {
return d.plugin.fs.UpdateEntry(d.plugin.path, en)
}
-func (d *dirHandler) RemoveEntry(ctx context.Context, en *stub.Entry) error {
+func (d *dirHandler) RemoveEntry(ctx context.Context, en *pluginapi.Entry) error {
if d == nil {
return types.ErrNoGroup
}
@@ -227,7 +227,7 @@ func (d *dirHandler) RemoveEntry(ctx context.Context, en *stub.Entry) error {
return nil
}
-func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
+func (d *dirHandler) ListChildren(ctx context.Context) ([]*pluginapi.Entry, error) {
if d == nil {
return nil, types.ErrNoGroup
}
@@ -236,7 +236,7 @@ func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
return nil, err
}
- children := make([]*stub.Entry, 0)
+ children := make([]*pluginapi.Entry, 0)
cachedChildMap := make(map[string]struct{})
for i, ch := range cachedChild {
cachedChildMap[ch.Name] = struct{}{}
@@ -247,7 +247,7 @@ func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
case d.dirKind == MirrorDirRoot:
if _, ok := cachedChildMap[MirrorDirJobs]; !ok {
- child, err := d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: MirrorDirJobs, Kind: types.ExternalGroupKind})
+ child, err := d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: MirrorDirJobs, Kind: types.ExternalGroupKind})
if err != nil {
wfLogger.Errorf("init mirror dir %s error: %s", MirrorDirJobs, err)
return nil, err
@@ -256,7 +256,7 @@ func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
}
if _, ok := cachedChildMap[MirrorDirWorkflows]; !ok {
- child, err := d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: MirrorDirWorkflows, Kind: types.ExternalGroupKind})
+ child, err := d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: MirrorDirWorkflows, Kind: types.ExternalGroupKind})
if err != nil {
wfLogger.Errorf("init mirror dir %s error: %s", MirrorDirWorkflows, err)
return nil, err
@@ -271,7 +271,7 @@ func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
}
for _, wf := range wfList {
if _, ok := cachedChildMap[id2MirrorFile(wf.Id)]; !ok {
- child, err := d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: id2MirrorFile(wf.Id), Kind: types.RawKind})
+ child, err := d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: id2MirrorFile(wf.Id), Kind: types.RawKind})
if err != nil {
wfLogger.Errorf("init mirror workflow file %s error: %s", id2MirrorFile(wf.Id), err)
return nil, err
@@ -286,7 +286,7 @@ func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
}
for _, wf := range wfList {
if _, ok := cachedChildMap[wf.Id]; !ok {
- child, err := d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: wf.Id, Kind: types.ExternalGroupKind})
+ child, err := d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: wf.Id, Kind: types.ExternalGroupKind})
if err != nil {
wfLogger.Errorf("init mirror jobs workflow group %s error: %s", wf.Id, err)
return nil, err
@@ -301,7 +301,7 @@ func (d *dirHandler) ListChildren(ctx context.Context) ([]*stub.Entry, error) {
}
for _, j := range jobList {
if _, ok := cachedChildMap[id2MirrorFile(j.Id)]; !ok {
- child, err := d.plugin.fs.CreateEntry(d.plugin.path, stub.EntryAttr{Name: id2MirrorFile(j.Id), Kind: types.RawKind})
+ child, err := d.plugin.fs.CreateEntry(d.plugin.path, pluginapi.EntryAttr{Name: id2MirrorFile(j.Id), Kind: types.RawKind})
if err != nil {
wfLogger.Errorf("init mirror job file %s error: %s", id2MirrorFile(j.Id), err)
return nil, err
@@ -368,7 +368,7 @@ func (f *fileHandler) Close(ctx context.Context) error {
return nil
}
-func (f *fileHandler) createOrUpdateWorkflow(ctx context.Context, en *stub.Entry) error {
+func (f *fileHandler) createOrUpdateWorkflow(ctx context.Context, en *pluginapi.Entry) error {
wf := &types.WorkflowSpec{}
decodeErr := yaml.NewDecoder(&memfsFile{filePath: f.plugin.path, entry: en, memfs: f.plugin.fs}).Decode(wf)
if decodeErr != nil {
@@ -413,7 +413,7 @@ func (f *fileHandler) createOrUpdateWorkflow(ctx context.Context, en *stub.Entry
return nil
}
-func (f *fileHandler) triggerOrUpdateWorkflowJob(ctx context.Context, en *stub.Entry) error {
+func (f *fileHandler) triggerOrUpdateWorkflowJob(ctx context.Context, en *pluginapi.Entry) error {
wfJob := &types.WorkflowJob{}
decodeErr := yaml.NewDecoder(&memfsFile{filePath: f.plugin.path, entry: en, memfs: f.plugin.fs}).Decode(wfJob)
if decodeErr != nil {
@@ -469,7 +469,7 @@ func (f *fileHandler) triggerOrUpdateWorkflowJob(ctx context.Context, en *stub.E
type memfsFile struct {
filePath string
- entry *stub.Entry
+ entry *pluginapi.Entry
memfs *plugin.MemFS
off int64
}
@@ -490,12 +490,12 @@ func buildWorkflowMirrorPlugin(mgr Manager) plugin.Builder {
mp := &MirrorPlugin{path: "/", fs: plugin.NewMemFS(), mgr: mgr}
mp.dirHandler = &dirHandler{plugin: mp, dirKind: MirrorDirRoot}
- _, _ = mp.fs.CreateEntry("/", stub.EntryAttr{
+ _, _ = mp.fs.CreateEntry("/", pluginapi.EntryAttr{
Name: MirrorDirJobs,
Kind: types.ExternalGroupKind,
})
- _, _ = mp.fs.CreateEntry("/", stub.EntryAttr{
+ _, _ = mp.fs.CreateEntry("/", pluginapi.EntryAttr{
Name: MirrorDirWorkflows,
Kind: types.ExternalGroupKind,
})
diff --git a/vendor/github.com/PuerkitoBio/goquery/.gitattributes b/vendor/github.com/PuerkitoBio/goquery/.gitattributes
new file mode 100644
index 00000000..0cc26ec0
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/.gitattributes
@@ -0,0 +1 @@
+testdata/* linguist-vendored
diff --git a/vendor/github.com/PuerkitoBio/goquery/.gitignore b/vendor/github.com/PuerkitoBio/goquery/.gitignore
new file mode 100644
index 00000000..970381cd
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/.gitignore
@@ -0,0 +1,16 @@
+# editor temporary files
+*.sublime-*
+.DS_Store
+*.swp
+#*.*#
+tags
+
+# direnv config
+.env*
+
+# test binaries
+*.test
+
+# coverage and profilte outputs
+*.out
+
diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE
new file mode 100644
index 00000000..25372c2b
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/LICENSE
@@ -0,0 +1,12 @@
+Copyright (c) 2012-2021, Martin Angers & Contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md
new file mode 100644
index 00000000..582ccac9
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/README.md
@@ -0,0 +1,198 @@
+# goquery - a little like that j-thing, only in Go
+
+[![Build Status](https://github.com/PuerkitoBio/goquery/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/PuerkitoBio/goquery/actions)
+[![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery)
+[![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
+
+goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
+
+Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. See the [wiki][] for various options to do this.
+
+Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...).
+
+## Table of Contents
+
+* [Installation](#installation)
+* [Changelog](#changelog)
+* [API](#api)
+* [Examples](#examples)
+* [Related Projects](#related-projects)
+* [Support](#support)
+* [License](#license)
+
+## Installation
+
+Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+.
+
+ $ go get github.com/PuerkitoBio/goquery
+
+(optional) To run unit tests:
+
+ $ cd $GOPATH/src/github.com/PuerkitoBio/goquery
+ $ go test
+
+(optional) To run benchmarks (warning: it runs for a few minutes):
+
+ $ cd $GOPATH/src/github.com/PuerkitoBio/goquery
+ $ go test -bench=".*"
+
+## Changelog
+
+**Note that goquery's API is now stable, and will not break.**
+
+* **2023-02-18 (v1.8.1)** : Update `go.mod` dependencies, update CI workflow.
+* **2021-10-25 (v1.8.0)** : Add `Render` function to render a `Selection` to an `io.Writer` (thanks [@anthonygedeon](https://github.com/anthonygedeon)).
+* **2021-07-11 (v1.7.1)** : Update go.mod dependencies and add dependabot config (thanks [@jauderho](https://github.com/jauderho)).
+* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)).
+* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes.
+* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this.
+* **2020-02-04 (v1.5.1)** : Update module dependencies.
+* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
+* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
+* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
+* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue).
+* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins).
+* **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv).
+* **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb).
+* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
+* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
+* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
+* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
+* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
+* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
+* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
+* **2014-11-07** : Add manipulation functions (thanks to [Andrew Stone][thatguystone]) and `*Matcher` functions, that receive compiled cascadia selectors instead of selector strings, thus avoiding potential panics thrown by goquery via `cascadia.MustCompile` calls. This results in better performance (selectors can be compiled once and reused) and more idiomatic error handling (you can handle cascadia's compilation errors, instead of recovering from panics, which had been bugging me for a long time). Note that the actual type expected is a `Matcher` interface, that `cascadia.Selector` implements. Other matcher implementations could be used.
+* **2014-11-06** : Change import paths of net/html to golang.org/x/net/html (see https://groups.google.com/forum/#!topic/golang-nuts/eD8dh3T9yyA). Make sure to update your code to use the new import path too when you call goquery with `html.Node`s.
+* **v0.3.2** : Add `NewDocumentFromReader()` (thanks jweir) which allows creating a goquery document from an io.Reader.
+* **v0.3.1** : Add `NewDocumentFromResponse()` (thanks assassingj) which allows creating a goquery document from an http response.
+* **v0.3.0** : Add `EachWithBreak()` which allows to break out of an `Each()` loop by returning false. This function was added instead of changing the existing `Each()` to avoid breaking compatibility.
+* **v0.2.1** : Make go-getable, now that [go.net/html is Go1.0-compatible][gonet] (thanks to @matrixik for pointing this out).
+* **v0.2.0** : Add support for negative indices in Slice(). **BREAKING CHANGE** `Document.Root` is removed, `Document` is now a `Selection` itself (a selection of one, the root element, just like `Document.Root` was before). Add jQuery's Closest() method.
+* **v0.1.1** : Add benchmarks to use as baseline for refactorings, refactor Next...() and Prev...() methods to use the new html package's linked list features (Next/PrevSibling, FirstChild). Good performance boost (40+% in some cases).
+* **v0.1.0** : Initial release.
+
+## API
+
+goquery exposes two structs, `Document` and `Selection`, and the `Matcher` interface. Unlike jQuery, which is loaded as part of a DOM document, and thus acts on its containing document, goquery doesn't know which HTML document to act upon. So it needs to be told, and that's what the `Document` type is for. It holds the root document node as the initial Selection value to manipulate.
+
+jQuery often has many variants for the same function (no argument, a selector string argument, a jQuery object argument, a DOM element argument, ...). Instead of exposing the same features in goquery as a single method with variadic empty interface arguments, statically-typed signatures are used following this naming convention:
+
+* When the jQuery equivalent can be called with no argument, it has the same name as jQuery for the no argument signature (e.g.: `Prev()`), and the version with a selector string argument is called `XxxFiltered()` (e.g.: `PrevFiltered()`)
+* When the jQuery equivalent **requires** one argument, the same name as jQuery is used for the selector string version (e.g.: `Is()`)
+* The signatures accepting a jQuery object as argument are defined in goquery as `XxxSelection()` and take a `*Selection` object as argument (e.g.: `FilterSelection()`)
+* The signatures accepting a DOM element as argument in jQuery are defined in goquery as `XxxNodes()` and take a variadic argument of type `*html.Node` (e.g.: `FilterNodes()`)
+* The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`)
+* The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`)
+
+Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
+
+The complete [package reference documentation can be found here][doc].
+
+Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
+
+* `Find("~")` returns an empty selection because the selector string doesn't match anything.
+* `Add("~")` returns a new selection that holds the same nodes as the original selection, because it didn't add any node (selector string didn't match anything).
+* `ParentsFiltered("~")` returns an empty selection because the selector string doesn't match anything.
+* `ParentsUntil("~")` returns all parents of the selection because the selector string didn't match any element to stop before the top element.
+
+## Examples
+
+See some tips and tricks in the [wiki][].
+
+Adapted from example_test.go:
+
+```Go
+package main
+
+import (
+ "fmt"
+ "log"
+ "net/http"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+func ExampleScrape() {
+ // Request the HTML page.
+ res, err := http.Get("http://metalsucks.net")
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
+ }
+
+ // Load the HTML document
+ doc, err := goquery.NewDocumentFromReader(res.Body)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // Find the review items
+ doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) {
+ // For each item found, get the title
+ title := s.Find("a").Text()
+ fmt.Printf("Review %d: %s\n", i, title)
+ })
+}
+
+func main() {
+ ExampleScrape()
+}
+```
+
+## Related Projects
+
+- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
+- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
+- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
+- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework
+- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
+- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
+- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins
+- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers.
+- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering.
+- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags.
+- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates.
+- [goskyr](https://github.com/jakopako/goskyr), an easily configurable command-line scraper written in Go.
+- [goGetJS](https://github.com/davemolk/goGetJS), a tool for extracting, searching, and saving JavaScript files (with optional headless browser).
+
+## Support
+
+There are a number of ways you can support the project:
+
+* Use it, star it, build something with it, spread the word!
+ - If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section!
+* Raise issues to improve the project (note: doc typos and clarifications are issues too!)
+ - Please search existing issues before opening a new one - it may have already been adressed.
+* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
+ - Make sure new code is tested.
+ - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
+* Sponsor the developer
+ - See the Github Sponsor button at the top of the repo on github
+ - or via BuyMeACoffee.com, below
+
+
+
+## License
+
+The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic].
+
+[jquery]: http://jquery.com/
+[go]: http://golang.org/
+[cascadia]: https://github.com/andybalholm/cascadia
+[cascadiacli]: https://github.com/suntong/cascadia
+[bsd]: http://opensource.org/licenses/BSD-3-Clause
+[golic]: http://golang.org/LICENSE
+[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
+[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery
+[index]: http://api.jquery.com/index/
+[gonet]: https://github.com/golang/net/
+[html]: https://pkg.go.dev/golang.org/x/net/html
+[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
+[thatguystone]: https://github.com/thatguystone
+[piotr]: https://github.com/piotrkowalczuk
+[goq]: https://github.com/andrewstuart/goq
+[thiemok]: https://github.com/thiemok
+[djw]: https://github.com/davidjwilkins
diff --git a/vendor/github.com/PuerkitoBio/goquery/array.go b/vendor/github.com/PuerkitoBio/goquery/array.go
new file mode 100644
index 00000000..1b1f6cbe
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/array.go
@@ -0,0 +1,124 @@
+package goquery
+
+import (
+ "golang.org/x/net/html"
+)
+
+const (
+ maxUint = ^uint(0)
+ maxInt = int(maxUint >> 1)
+
+ // ToEnd is a special index value that can be used as end index in a call
+ // to Slice so that all elements are selected until the end of the Selection.
+ // It is equivalent to passing (*Selection).Length().
+ ToEnd = maxInt
+)
+
+// First reduces the set of matched elements to the first in the set.
+// It returns a new Selection object, and an empty Selection object if the
+// the selection is empty.
+func (s *Selection) First() *Selection {
+ return s.Eq(0)
+}
+
+// Last reduces the set of matched elements to the last in the set.
+// It returns a new Selection object, and an empty Selection object if
+// the selection is empty.
+func (s *Selection) Last() *Selection {
+ return s.Eq(-1)
+}
+
+// Eq reduces the set of matched elements to the one at the specified index.
+// If a negative index is given, it counts backwards starting at the end of the
+// set. It returns a new Selection object, and an empty Selection object if the
+// index is invalid.
+func (s *Selection) Eq(index int) *Selection {
+ if index < 0 {
+ index += len(s.Nodes)
+ }
+
+ if index >= len(s.Nodes) || index < 0 {
+ return newEmptySelection(s.document)
+ }
+
+ return s.Slice(index, index+1)
+}
+
+// Slice reduces the set of matched elements to a subset specified by a range
+// of indices. The start index is 0-based and indicates the index of the first
+// element to select. The end index is 0-based and indicates the index at which
+// the elements stop being selected (the end index is not selected).
+//
+// The indices may be negative, in which case they represent an offset from the
+// end of the selection.
+//
+// The special value ToEnd may be specified as end index, in which case all elements
+// until the end are selected. This works both for a positive and negative start
+// index.
+func (s *Selection) Slice(start, end int) *Selection {
+ if start < 0 {
+ start += len(s.Nodes)
+ }
+ if end == ToEnd {
+ end = len(s.Nodes)
+ } else if end < 0 {
+ end += len(s.Nodes)
+ }
+ return pushStack(s, s.Nodes[start:end])
+}
+
+// Get retrieves the underlying node at the specified index.
+// Get without parameter is not implemented, since the node array is available
+// on the Selection object.
+func (s *Selection) Get(index int) *html.Node {
+ if index < 0 {
+ index += len(s.Nodes) // Negative index gets from the end
+ }
+ return s.Nodes[index]
+}
+
+// Index returns the position of the first element within the Selection object
+// relative to its sibling elements.
+func (s *Selection) Index() int {
+ if len(s.Nodes) > 0 {
+ return newSingleSelection(s.Nodes[0], s.document).PrevAll().Length()
+ }
+ return -1
+}
+
+// IndexSelector returns the position of the first element within the
+// Selection object relative to the elements matched by the selector, or -1 if
+// not found.
+func (s *Selection) IndexSelector(selector string) int {
+ if len(s.Nodes) > 0 {
+ sel := s.document.Find(selector)
+ return indexInSlice(sel.Nodes, s.Nodes[0])
+ }
+ return -1
+}
+
+// IndexMatcher returns the position of the first element within the
+// Selection object relative to the elements matched by the matcher, or -1 if
+// not found.
+func (s *Selection) IndexMatcher(m Matcher) int {
+ if len(s.Nodes) > 0 {
+ sel := s.document.FindMatcher(m)
+ return indexInSlice(sel.Nodes, s.Nodes[0])
+ }
+ return -1
+}
+
+// IndexOfNode returns the position of the specified node within the Selection
+// object, or -1 if not found.
+func (s *Selection) IndexOfNode(node *html.Node) int {
+ return indexInSlice(s.Nodes, node)
+}
+
+// IndexOfSelection returns the position of the first node in the specified
+// Selection object within this Selection object, or -1 if not found.
+func (s *Selection) IndexOfSelection(sel *Selection) int {
+ if sel != nil && len(sel.Nodes) > 0 {
+ return indexInSlice(s.Nodes, sel.Nodes[0])
+ }
+ return -1
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/doc.go b/vendor/github.com/PuerkitoBio/goquery/doc.go
new file mode 100644
index 00000000..71146a78
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/doc.go
@@ -0,0 +1,123 @@
+// Copyright (c) 2012-2016, Martin Angers & Contributors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation and/or
+// other materials provided with the distribution.
+// * Neither the name of the author nor the names of its contributors may be used to
+// endorse or promote products derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
+// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/*
+Package goquery implements features similar to jQuery, including the chainable
+syntax, to manipulate and query an HTML document.
+
+It brings a syntax and a set of features similar to jQuery to the Go language.
+It is based on Go's net/html package and the CSS Selector library cascadia.
+Since the net/html parser returns nodes, and not a full-featured DOM
+tree, jQuery's stateful manipulation functions (like height(), css(), detach())
+have been left off.
+
+Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is
+the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML.
+See the repository's wiki for various options on how to do this.
+
+Syntax-wise, it is as close as possible to jQuery, with the same method names when
+possible, and that warm and fuzzy chainable interface. jQuery being the
+ultra-popular library that it is, writing a similar HTML-manipulating
+library was better to follow its API than to start anew (in the same spirit as
+Go's fmt package), even though some of its methods are less than intuitive (looking
+at you, index()...).
+
+It is hosted on GitHub, along with additional documentation in the README.md
+file: https://github.com/puerkitobio/goquery
+
+Please note that because of the net/html dependency, goquery requires Go1.1+.
+
+The various methods are split into files based on the category of behavior.
+The three dots (...) indicate that various "overloads" are available.
+
+* array.go : array-like positional manipulation of the selection.
+ - Eq()
+ - First()
+ - Get()
+ - Index...()
+ - Last()
+ - Slice()
+
+* expand.go : methods that expand or augment the selection's set.
+ - Add...()
+ - AndSelf()
+ - Union(), which is an alias for AddSelection()
+
+* filter.go : filtering methods, that reduce the selection's set.
+ - End()
+ - Filter...()
+ - Has...()
+ - Intersection(), which is an alias of FilterSelection()
+ - Not...()
+
+* iteration.go : methods to loop over the selection's nodes.
+ - Each()
+ - EachWithBreak()
+ - Map()
+
+* manipulation.go : methods for modifying the document
+ - After...()
+ - Append...()
+ - Before...()
+ - Clone()
+ - Empty()
+ - Prepend...()
+ - Remove...()
+ - ReplaceWith...()
+ - Unwrap()
+ - Wrap...()
+ - WrapAll...()
+ - WrapInner...()
+
+* property.go : methods that inspect and get the node's properties values.
+ - Attr*(), RemoveAttr(), SetAttr()
+ - AddClass(), HasClass(), RemoveClass(), ToggleClass()
+ - Html()
+ - Length()
+ - Size(), which is an alias for Length()
+ - Text()
+
+* query.go : methods that query, or reflect, a node's identity.
+ - Contains()
+ - Is...()
+
+* traversal.go : methods to traverse the HTML document tree.
+ - Children...()
+ - Contents()
+ - Find...()
+ - Next...()
+ - Parent[s]...()
+ - Prev...()
+ - Siblings...()
+
+* type.go : definition of the types exposed by goquery.
+ - Document
+ - Selection
+ - Matcher
+
+* utilities.go : definition of helper functions (and not methods on a *Selection)
+that are not part of jQuery, but are useful to goquery.
+ - NodeName
+ - OuterHtml
+*/
+package goquery
diff --git a/vendor/github.com/PuerkitoBio/goquery/expand.go b/vendor/github.com/PuerkitoBio/goquery/expand.go
new file mode 100644
index 00000000..7caade53
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/expand.go
@@ -0,0 +1,70 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Add adds the selector string's matching nodes to those in the current
+// selection and returns a new Selection object.
+// The selector string is run in the context of the document of the current
+// Selection object.
+func (s *Selection) Add(selector string) *Selection {
+ return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, compileMatcher(selector))...)
+}
+
+// AddMatcher adds the matcher's matching nodes to those in the current
+// selection and returns a new Selection object.
+// The matcher is run in the context of the document of the current
+// Selection object.
+func (s *Selection) AddMatcher(m Matcher) *Selection {
+ return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, m)...)
+}
+
+// AddSelection adds the specified Selection object's nodes to those in the
+// current selection and returns a new Selection object.
+func (s *Selection) AddSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.AddNodes()
+ }
+ return s.AddNodes(sel.Nodes...)
+}
+
+// Union is an alias for AddSelection.
+func (s *Selection) Union(sel *Selection) *Selection {
+ return s.AddSelection(sel)
+}
+
+// AddNodes adds the specified nodes to those in the
+// current selection and returns a new Selection object.
+func (s *Selection) AddNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, appendWithoutDuplicates(s.Nodes, nodes, nil))
+}
+
+// AndSelf adds the previous set of elements on the stack to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the previous one.
+// Deprecated: This function has been deprecated and is now an alias for AddBack().
+func (s *Selection) AndSelf() *Selection {
+ return s.AddBack()
+}
+
+// AddBack adds the previous set of elements on the stack to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the previous one.
+func (s *Selection) AddBack() *Selection {
+ return s.AddSelection(s.prevSel)
+}
+
+// AddBackFiltered reduces the previous set of elements on the stack to those that
+// match the selector string, and adds them to the current set.
+// It returns a new Selection object containing the current Selection combined
+// with the filtered previous one
+func (s *Selection) AddBackFiltered(selector string) *Selection {
+ return s.AddSelection(s.prevSel.Filter(selector))
+}
+
+// AddBackMatcher reduces the previous set of elements on the stack to those that match
+// the mateher, and adds them to the curernt set.
+// It returns a new Selection object containing the current Selection combined
+// with the filtered previous one
+func (s *Selection) AddBackMatcher(m Matcher) *Selection {
+ return s.AddSelection(s.prevSel.FilterMatcher(m))
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/filter.go b/vendor/github.com/PuerkitoBio/goquery/filter.go
new file mode 100644
index 00000000..9138ffb3
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/filter.go
@@ -0,0 +1,163 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Filter reduces the set of matched elements to those that match the selector string.
+// It returns a new Selection object for this subset of matching elements.
+func (s *Selection) Filter(selector string) *Selection {
+ return s.FilterMatcher(compileMatcher(selector))
+}
+
+// FilterMatcher reduces the set of matched elements to those that match
+// the given matcher. It returns a new Selection object for this subset
+// of matching elements.
+func (s *Selection) FilterMatcher(m Matcher) *Selection {
+ return pushStack(s, winnow(s, m, true))
+}
+
+// Not removes elements from the Selection that match the selector string.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) Not(selector string) *Selection {
+ return s.NotMatcher(compileMatcher(selector))
+}
+
+// NotMatcher removes elements from the Selection that match the given matcher.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotMatcher(m Matcher) *Selection {
+ return pushStack(s, winnow(s, m, false))
+}
+
+// FilterFunction reduces the set of matched elements to those that pass the function's test.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterFunction(f func(int, *Selection) bool) *Selection {
+ return pushStack(s, winnowFunction(s, f, true))
+}
+
+// NotFunction removes elements from the Selection that pass the function's test.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotFunction(f func(int, *Selection) bool) *Selection {
+ return pushStack(s, winnowFunction(s, f, false))
+}
+
+// FilterNodes reduces the set of matched elements to those that match the specified nodes.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, winnowNodes(s, nodes, true))
+}
+
+// NotNodes removes elements from the Selection that match the specified nodes.
+// It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, winnowNodes(s, nodes, false))
+}
+
+// FilterSelection reduces the set of matched elements to those that match a
+// node in the specified Selection object.
+// It returns a new Selection object for this subset of elements.
+func (s *Selection) FilterSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, winnowNodes(s, nil, true))
+ }
+ return pushStack(s, winnowNodes(s, sel.Nodes, true))
+}
+
+// NotSelection removes elements from the Selection that match a node in the specified
+// Selection object. It returns a new Selection object with the matching elements removed.
+func (s *Selection) NotSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, winnowNodes(s, nil, false))
+ }
+ return pushStack(s, winnowNodes(s, sel.Nodes, false))
+}
+
+// Intersection is an alias for FilterSelection.
+func (s *Selection) Intersection(sel *Selection) *Selection {
+ return s.FilterSelection(sel)
+}
+
+// Has reduces the set of matched elements to those that have a descendant
+// that matches the selector.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) Has(selector string) *Selection {
+ return s.HasSelection(s.document.Find(selector))
+}
+
+// HasMatcher reduces the set of matched elements to those that have a descendant
+// that matches the matcher.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasMatcher(m Matcher) *Selection {
+ return s.HasSelection(s.document.FindMatcher(m))
+}
+
+// HasNodes reduces the set of matched elements to those that have a
+// descendant that matches one of the nodes.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasNodes(nodes ...*html.Node) *Selection {
+ return s.FilterFunction(func(_ int, sel *Selection) bool {
+ // Add all nodes that contain one of the specified nodes
+ for _, n := range nodes {
+ if sel.Contains(n) {
+ return true
+ }
+ }
+ return false
+ })
+}
+
+// HasSelection reduces the set of matched elements to those that have a
+// descendant that matches one of the nodes of the specified Selection object.
+// It returns a new Selection object with the matching elements.
+func (s *Selection) HasSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.HasNodes()
+ }
+ return s.HasNodes(sel.Nodes...)
+}
+
+// End ends the most recent filtering operation in the current chain and
+// returns the set of matched elements to its previous state.
+func (s *Selection) End() *Selection {
+ if s.prevSel != nil {
+ return s.prevSel
+ }
+ return newEmptySelection(s.document)
+}
+
+// Filter based on the matcher, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnow(sel *Selection, m Matcher, keep bool) []*html.Node {
+ // Optimize if keep is requested
+ if keep {
+ return m.Filter(sel.Nodes)
+ }
+ // Use grep
+ return grep(sel, func(i int, s *Selection) bool {
+ return !m.Match(s.Get(0))
+ })
+}
+
+// Filter based on an array of nodes, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnowNodes(sel *Selection, nodes []*html.Node, keep bool) []*html.Node {
+ if len(nodes)+len(sel.Nodes) < minNodesForSet {
+ return grep(sel, func(i int, s *Selection) bool {
+ return isInSlice(nodes, s.Get(0)) == keep
+ })
+ }
+
+ set := make(map[*html.Node]bool)
+ for _, n := range nodes {
+ set[n] = true
+ }
+ return grep(sel, func(i int, s *Selection) bool {
+ return set[s.Get(0)] == keep
+ })
+}
+
+// Filter based on a function test, and the indicator to keep (Filter) or
+// to get rid of (Not) the matching elements.
+func winnowFunction(sel *Selection, f func(int, *Selection) bool, keep bool) []*html.Node {
+ return grep(sel, func(i int, s *Selection) bool {
+ return f(i, s) == keep
+ })
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/iteration.go b/vendor/github.com/PuerkitoBio/goquery/iteration.go
new file mode 100644
index 00000000..e246f2e0
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/iteration.go
@@ -0,0 +1,39 @@
+package goquery
+
+// Each iterates over a Selection object, executing a function for each
+// matched element. It returns the current Selection object. The function
+// f is called for each element in the selection with the index of the
+// element in that selection starting at 0, and a *Selection that contains
+// only that element.
+func (s *Selection) Each(f func(int, *Selection)) *Selection {
+ for i, n := range s.Nodes {
+ f(i, newSingleSelection(n, s.document))
+ }
+ return s
+}
+
+// EachWithBreak iterates over a Selection object, executing a function for each
+// matched element. It is identical to Each except that it is possible to break
+// out of the loop by returning false in the callback function. It returns the
+// current Selection object.
+func (s *Selection) EachWithBreak(f func(int, *Selection) bool) *Selection {
+ for i, n := range s.Nodes {
+ if !f(i, newSingleSelection(n, s.document)) {
+ return s
+ }
+ }
+ return s
+}
+
+// Map passes each element in the current matched set through a function,
+// producing a slice of string holding the returned values. The function
+// f is called for each element in the selection with the index of the
+// element in that selection starting at 0, and a *Selection that contains
+// only that element.
+func (s *Selection) Map(f func(int, *Selection) string) (result []string) {
+ for i, n := range s.Nodes {
+ result = append(result, f(i, newSingleSelection(n, s.document)))
+ }
+
+ return result
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
new file mode 100644
index 00000000..35febf11
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/manipulation.go
@@ -0,0 +1,679 @@
+package goquery
+
+import (
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+// After applies the selector from the root document and inserts the matched elements
+// after the elements in the set of matched elements.
+//
+// If one of the matched elements in the selection is not currently in the
+// document, it's impossible to insert nodes after it, so it will be ignored.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) After(selector string) *Selection {
+ return s.AfterMatcher(compileMatcher(selector))
+}
+
+// AfterMatcher applies the matcher from the root document and inserts the matched elements
+// after the elements in the set of matched elements.
+//
+// If one of the matched elements in the selection is not currently in the
+// document, it's impossible to insert nodes after it, so it will be ignored.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterMatcher(m Matcher) *Selection {
+ return s.AfterNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// AfterSelection inserts the elements in the selection after each element in the set of matched
+// elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterSelection(sel *Selection) *Selection {
+ return s.AfterNodes(sel.Nodes...)
+}
+
+// AfterHtml parses the html and inserts it after the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
+}
+
+// AfterNodes inserts the nodes after each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AfterNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
+ if sn.Parent != nil {
+ sn.Parent.InsertBefore(n, sn.NextSibling)
+ }
+ })
+}
+
+// Append appends the elements specified by the selector to the end of each element
+// in the set of matched elements, following those rules:
+//
+// 1) The selector is applied to the root document.
+//
+// 2) Elements that are part of the document will be moved to the new location.
+//
+// 3) If there are multiple locations to append to, cloned nodes will be
+// appended to all target locations except the last one, which will be moved
+// as noted in (2).
+func (s *Selection) Append(selector string) *Selection {
+ return s.AppendMatcher(compileMatcher(selector))
+}
+
+// AppendMatcher appends the elements specified by the matcher to the end of each element
+// in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendMatcher(m Matcher) *Selection {
+ return s.AppendNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// AppendSelection appends the elements in the selection to the end of each element
+// in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendSelection(sel *Selection) *Selection {
+ return s.AppendNodes(sel.Nodes...)
+}
+
+// AppendHtml parses the html and appends it to the set of matched elements.
+func (s *Selection) AppendHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
+}
+
+// AppendNodes appends the specified nodes to each node in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) AppendNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
+ sn.AppendChild(n)
+ })
+}
+
+// Before inserts the matched elements before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) Before(selector string) *Selection {
+ return s.BeforeMatcher(compileMatcher(selector))
+}
+
+// BeforeMatcher inserts the matched elements before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeMatcher(m Matcher) *Selection {
+ return s.BeforeNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// BeforeSelection inserts the elements in the selection before each element in the set of matched
+// elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeSelection(sel *Selection) *Selection {
+ return s.BeforeNodes(sel.Nodes...)
+}
+
+// BeforeHtml parses the html and inserts it before the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, node)
+ }
+ }
+ })
+}
+
+// BeforeNodes inserts the nodes before each element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) BeforeNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
+ if sn.Parent != nil {
+ sn.Parent.InsertBefore(n, sn)
+ }
+ })
+}
+
+// Clone creates a deep copy of the set of matched nodes. The new nodes will not be
+// attached to the document.
+func (s *Selection) Clone() *Selection {
+ ns := newEmptySelection(s.document)
+ ns.Nodes = cloneNodes(s.Nodes)
+ return ns
+}
+
+// Empty removes all children nodes from the set of matched elements.
+// It returns the children nodes in a new Selection.
+func (s *Selection) Empty() *Selection {
+ var nodes []*html.Node
+
+ for _, n := range s.Nodes {
+ for c := n.FirstChild; c != nil; c = n.FirstChild {
+ n.RemoveChild(c)
+ nodes = append(nodes, c)
+ }
+ }
+
+ return pushStack(s, nodes)
+}
+
+// Prepend prepends the elements specified by the selector to each element in
+// the set of matched elements, following the same rules as Append.
+func (s *Selection) Prepend(selector string) *Selection {
+ return s.PrependMatcher(compileMatcher(selector))
+}
+
+// PrependMatcher prepends the elements specified by the matcher to each
+// element in the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependMatcher(m Matcher) *Selection {
+ return s.PrependNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// PrependSelection prepends the elements in the selection to each element in
+// the set of matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependSelection(sel *Selection) *Selection {
+ return s.PrependNodes(sel.Nodes...)
+}
+
+// PrependHtml parses the html and prepends it to the set of matched elements.
+func (s *Selection) PrependHtml(htmlStr string) *Selection {
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ firstChild := node.FirstChild
+ for _, n := range nodes {
+ node.InsertBefore(n, firstChild)
+ }
+ })
+}
+
+// PrependNodes prepends the specified nodes to each node in the set of
+// matched elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) PrependNodes(ns ...*html.Node) *Selection {
+ return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
+ // sn.FirstChild may be nil, in which case this functions like
+ // sn.AppendChild()
+ sn.InsertBefore(n, sn.FirstChild)
+ })
+}
+
+// Remove removes the set of matched elements from the document.
+// It returns the same selection, now consisting of nodes not in the document.
+func (s *Selection) Remove() *Selection {
+ for _, n := range s.Nodes {
+ if n.Parent != nil {
+ n.Parent.RemoveChild(n)
+ }
+ }
+
+ return s
+}
+
+// RemoveFiltered removes from the current set of matched elements those that
+// match the selector filter. It returns the Selection of removed nodes.
+//
+// For example if the selection s contains "", "
" and "
"
+// and s.RemoveFiltered("h2") is called, only the "
" node is removed
+// (and returned), while "
" and "
" are kept in the document.
+func (s *Selection) RemoveFiltered(selector string) *Selection {
+ return s.RemoveMatcher(compileMatcher(selector))
+}
+
+// RemoveMatcher removes from the current set of matched elements those that
+// match the Matcher filter. It returns the Selection of removed nodes.
+// See RemoveFiltered for additional information.
+func (s *Selection) RemoveMatcher(m Matcher) *Selection {
+ return s.FilterMatcher(m).Remove()
+}
+
+// ReplaceWith replaces each element in the set of matched elements with the
+// nodes matched by the given selector.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWith(selector string) *Selection {
+ return s.ReplaceWithMatcher(compileMatcher(selector))
+}
+
+// ReplaceWithMatcher replaces each element in the set of matched elements with
+// the nodes matched by the given Matcher.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithMatcher(m Matcher) *Selection {
+ return s.ReplaceWithNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// ReplaceWithSelection replaces each element in the set of matched elements with
+// the nodes from the given Selection.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
+ return s.ReplaceWithNodes(sel.Nodes...)
+}
+
+// ReplaceWithHtml replaces each element in the set of matched elements with
+// the parsed HTML.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection {
+ s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) {
+ nextSibling := node.NextSibling
+ for _, n := range nodes {
+ if node.Parent != nil {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
+ }
+ })
+ return s.Remove()
+}
+
+// ReplaceWithNodes replaces each element in the set of matched elements with
+// the given nodes.
+// It returns the removed elements.
+//
+// This follows the same rules as Selection.Append.
+func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
+ s.AfterNodes(ns...)
+ return s.Remove()
+}
+
+// SetHtml sets the html content of each element in the selection to
+// specified html string.
+func (s *Selection) SetHtml(htmlStr string) *Selection {
+ for _, context := range s.Nodes {
+ for c := context.FirstChild; c != nil; c = context.FirstChild {
+ context.RemoveChild(c)
+ }
+ }
+ return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) {
+ for _, n := range nodes {
+ node.AppendChild(n)
+ }
+ })
+}
+
+// SetText sets the content of each element in the selection to specified content.
+// The provided text string is escaped.
+func (s *Selection) SetText(text string) *Selection {
+ return s.SetHtml(html.EscapeString(text))
+}
+
+// Unwrap removes the parents of the set of matched elements, leaving the matched
+// elements (and their siblings, if any) in their place.
+// It returns the original selection.
+func (s *Selection) Unwrap() *Selection {
+ s.Parent().Each(func(i int, ss *Selection) {
+ // For some reason, jquery allows unwrap to remove the element, so
+ // allowing it here too. Same for . Why it allows those elements to
+ // be unwrapped while not allowing body is a mystery to me.
+ if ss.Nodes[0].Data != "body" {
+ ss.ReplaceWithSelection(ss.Contents())
+ }
+ })
+
+ return s
+}
+
+// Wrap wraps each element in the set of matched elements inside the first
+// element matched by the given selector. The matched child is cloned before
+// being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) Wrap(selector string) *Selection {
+ return s.WrapMatcher(compileMatcher(selector))
+}
+
+// WrapMatcher wraps each element in the set of matched elements inside the
+// first element matched by the given matcher. The matched child is cloned
+// before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapMatcher(m Matcher) *Selection {
+ return s.wrapNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// WrapSelection wraps each element in the set of matched elements inside the
+// first element in the given Selection. The element is cloned before being
+// inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapSelection(sel *Selection) *Selection {
+ return s.wrapNodes(sel.Nodes...)
+}
+
+// WrapHtml wraps each element in the set of matched elements inside the inner-
+// most child of the given HTML.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapHtml(htmlStr string) *Selection {
+ nodesMap := make(map[string][]*html.Node)
+ for _, context := range s.Nodes {
+ var parent *html.Node
+ if context.Parent != nil {
+ parent = context.Parent
+ } else {
+ parent = &html.Node{Type: html.ElementNode}
+ }
+ nodes, found := nodesMap[nodeName(parent)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, parent)
+ nodesMap[nodeName(parent)] = nodes
+ }
+ newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...)
+ }
+ return s
+}
+
+// WrapNode wraps each element in the set of matched elements inside the inner-
+// most child of the given node. The given node is copied before being inserted
+// into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapNode(n *html.Node) *Selection {
+ return s.wrapNodes(n)
+}
+
+func (s *Selection) wrapNodes(ns ...*html.Node) *Selection {
+ s.Each(func(i int, ss *Selection) {
+ ss.wrapAllNodes(ns...)
+ })
+
+ return s
+}
+
+// WrapAll wraps a single HTML structure, matched by the given selector, around
+// all elements in the set of matched elements. The matched child is cloned
+// before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAll(selector string) *Selection {
+ return s.WrapAllMatcher(compileMatcher(selector))
+}
+
+// WrapAllMatcher wraps a single HTML structure, matched by the given Matcher,
+// around all elements in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllMatcher(m Matcher) *Selection {
+ return s.wrapAllNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// WrapAllSelection wraps a single HTML structure, the first node of the given
+// Selection, around all elements in the set of matched elements. The matched
+// child is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
+ return s.wrapAllNodes(sel.Nodes...)
+}
+
+// WrapAllHtml wraps the given HTML structure around all elements in the set of
+// matched elements. The matched child is cloned before being inserted into the
+// document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllHtml(htmlStr string) *Selection {
+ var context *html.Node
+ var nodes []*html.Node
+ if len(s.Nodes) > 0 {
+ context = s.Nodes[0]
+ if context.Parent != nil {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ } else {
+ nodes = parseHtml(htmlStr)
+ }
+ }
+ return s.wrapAllNodes(nodes...)
+}
+
+func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
+ if len(ns) > 0 {
+ return s.WrapAllNode(ns[0])
+ }
+ return s
+}
+
+// WrapAllNode wraps the given node around the first element in the Selection,
+// making all other nodes in the Selection children of the given node. The node
+// is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapAllNode(n *html.Node) *Selection {
+ if s.Size() == 0 {
+ return s
+ }
+
+ wrap := cloneNode(n)
+
+ first := s.Nodes[0]
+ if first.Parent != nil {
+ first.Parent.InsertBefore(wrap, first)
+ first.Parent.RemoveChild(first)
+ }
+
+ for c := getFirstChildEl(wrap); c != nil; c = getFirstChildEl(wrap) {
+ wrap = c
+ }
+
+ newSingleSelection(wrap, s.document).AppendSelection(s)
+
+ return s
+}
+
+// WrapInner wraps an HTML structure, matched by the given selector, around the
+// content of element in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInner(selector string) *Selection {
+ return s.WrapInnerMatcher(compileMatcher(selector))
+}
+
+// WrapInnerMatcher wraps an HTML structure, matched by the given selector,
+// around the content of element in the set of matched elements. The matched
+// child is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerMatcher(m Matcher) *Selection {
+ return s.wrapInnerNodes(m.MatchAll(s.document.rootNode)...)
+}
+
+// WrapInnerSelection wraps an HTML structure, matched by the given selector,
+// around the content of element in the set of matched elements. The matched
+// child is cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
+ return s.wrapInnerNodes(sel.Nodes...)
+}
+
+// WrapInnerHtml wraps an HTML structure, matched by the given selector, around
+// the content of element in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerHtml(htmlStr string) *Selection {
+ nodesMap := make(map[string][]*html.Node)
+ for _, context := range s.Nodes {
+ nodes, found := nodesMap[nodeName(context)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ nodesMap[nodeName(context)] = nodes
+ }
+ newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...)
+ }
+ return s
+}
+
+// WrapInnerNode wraps an HTML structure, matched by the given selector, around
+// the content of element in the set of matched elements. The matched child is
+// cloned before being inserted into the document.
+//
+// It returns the original set of elements.
+func (s *Selection) WrapInnerNode(n *html.Node) *Selection {
+ return s.wrapInnerNodes(n)
+}
+
+func (s *Selection) wrapInnerNodes(ns ...*html.Node) *Selection {
+ if len(ns) == 0 {
+ return s
+ }
+
+ s.Each(func(i int, s *Selection) {
+ contents := s.Contents()
+
+ if contents.Size() > 0 {
+ contents.wrapAllNodes(ns...)
+ } else {
+ s.AppendNodes(cloneNode(ns[0]))
+ }
+ })
+
+ return s
+}
+
+func parseHtml(h string) []*html.Node {
+ // Errors are only returned when the io.Reader returns any error besides
+ // EOF, but strings.Reader never will
+ nodes, err := html.ParseFragment(strings.NewReader(h), &html.Node{Type: html.ElementNode})
+ if err != nil {
+ panic("goquery: failed to parse HTML: " + err.Error())
+ }
+ return nodes
+}
+
+func parseHtmlWithContext(h string, context *html.Node) []*html.Node {
+ // Errors are only returned when the io.Reader returns any error besides
+ // EOF, but strings.Reader never will
+ nodes, err := html.ParseFragment(strings.NewReader(h), context)
+ if err != nil {
+ panic("goquery: failed to parse HTML: " + err.Error())
+ }
+ return nodes
+}
+
+// Get the first child that is an ElementNode
+func getFirstChildEl(n *html.Node) *html.Node {
+ c := n.FirstChild
+ for c != nil && c.Type != html.ElementNode {
+ c = c.NextSibling
+ }
+ return c
+}
+
+// Deep copy a slice of nodes.
+func cloneNodes(ns []*html.Node) []*html.Node {
+ cns := make([]*html.Node, 0, len(ns))
+
+ for _, n := range ns {
+ cns = append(cns, cloneNode(n))
+ }
+
+ return cns
+}
+
+// Deep copy a node. The new node has clones of all the original node's
+// children but none of its parents or siblings.
+func cloneNode(n *html.Node) *html.Node {
+ nn := &html.Node{
+ Type: n.Type,
+ DataAtom: n.DataAtom,
+ Data: n.Data,
+ Attr: make([]html.Attribute, len(n.Attr)),
+ }
+
+ copy(nn.Attr, n.Attr)
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ nn.AppendChild(cloneNode(c))
+ }
+
+ return nn
+}
+
+func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
+ f func(sn *html.Node, n *html.Node)) *Selection {
+
+ lasti := s.Size() - 1
+
+ // net.Html doesn't provide document fragments for insertion, so to get
+ // things in the correct order with After() and Prepend(), the callback
+ // needs to be called on the reverse of the nodes.
+ if reverse {
+ for i, j := 0, len(ns)-1; i < j; i, j = i+1, j-1 {
+ ns[i], ns[j] = ns[j], ns[i]
+ }
+ }
+
+ for i, sn := range s.Nodes {
+ for _, n := range ns {
+ if i != lasti {
+ f(sn, cloneNode(n))
+ } else {
+ if n.Parent != nil {
+ n.Parent.RemoveChild(n)
+ }
+ f(sn, n)
+ }
+ }
+ }
+
+ return s
+}
+
+// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn.
+// The parsed nodes are inserted for each element of the selection.
+// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html.
+// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context.
+func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection {
+ // cache to avoid parsing the html for the same context multiple times
+ nodeCache := make(map[string][]*html.Node)
+ var context *html.Node
+ for _, n := range s.Nodes {
+ if isParent {
+ context = n.Parent
+ } else {
+ if n.Type != html.ElementNode {
+ continue
+ }
+ context = n
+ }
+ if context != nil {
+ nodes, found := nodeCache[nodeName(context)]
+ if !found {
+ nodes = parseHtmlWithContext(htmlStr, context)
+ nodeCache[nodeName(context)] = nodes
+ }
+ mergeFn(n, cloneNodes(nodes))
+ }
+ }
+ return s
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/property.go b/vendor/github.com/PuerkitoBio/goquery/property.go
new file mode 100644
index 00000000..411126db
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/property.go
@@ -0,0 +1,275 @@
+package goquery
+
+import (
+ "bytes"
+ "regexp"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+var rxClassTrim = regexp.MustCompile("[\t\r\n]")
+
+// Attr gets the specified attribute's value for the first element in the
+// Selection. To get the value for each element individually, use a looping
+// construct such as Each or Map method.
+func (s *Selection) Attr(attrName string) (val string, exists bool) {
+ if len(s.Nodes) == 0 {
+ return
+ }
+ return getAttributeValue(attrName, s.Nodes[0])
+}
+
+// AttrOr works like Attr but returns default value if attribute is not present.
+func (s *Selection) AttrOr(attrName, defaultValue string) string {
+ if len(s.Nodes) == 0 {
+ return defaultValue
+ }
+
+ val, exists := getAttributeValue(attrName, s.Nodes[0])
+ if !exists {
+ return defaultValue
+ }
+
+ return val
+}
+
+// RemoveAttr removes the named attribute from each element in the set of matched elements.
+func (s *Selection) RemoveAttr(attrName string) *Selection {
+ for _, n := range s.Nodes {
+ removeAttr(n, attrName)
+ }
+
+ return s
+}
+
+// SetAttr sets the given attribute on each element in the set of matched elements.
+func (s *Selection) SetAttr(attrName, val string) *Selection {
+ for _, n := range s.Nodes {
+ attr := getAttributePtr(attrName, n)
+ if attr == nil {
+ n.Attr = append(n.Attr, html.Attribute{Key: attrName, Val: val})
+ } else {
+ attr.Val = val
+ }
+ }
+
+ return s
+}
+
+// Text gets the combined text contents of each element in the set of matched
+// elements, including their descendants.
+func (s *Selection) Text() string {
+ var buf bytes.Buffer
+
+ // Slightly optimized vs calling Each: no single selection object created
+ var f func(*html.Node)
+ f = func(n *html.Node) {
+ if n.Type == html.TextNode {
+ // Keep newlines and spaces, like jQuery
+ buf.WriteString(n.Data)
+ }
+ if n.FirstChild != nil {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ f(c)
+ }
+ }
+ }
+ for _, n := range s.Nodes {
+ f(n)
+ }
+
+ return buf.String()
+}
+
+// Size is an alias for Length.
+func (s *Selection) Size() int {
+ return s.Length()
+}
+
+// Length returns the number of elements in the Selection object.
+func (s *Selection) Length() int {
+ return len(s.Nodes)
+}
+
+// Html gets the HTML contents of the first element in the set of matched
+// elements. It includes text and comment nodes.
+func (s *Selection) Html() (ret string, e error) {
+ // Since there is no .innerHtml, the HTML content must be re-created from
+ // the nodes using html.Render.
+ var buf bytes.Buffer
+
+ if len(s.Nodes) > 0 {
+ for c := s.Nodes[0].FirstChild; c != nil; c = c.NextSibling {
+ e = html.Render(&buf, c)
+ if e != nil {
+ return
+ }
+ }
+ ret = buf.String()
+ }
+
+ return
+}
+
+// AddClass adds the given class(es) to each element in the set of matched elements.
+// Multiple class names can be specified, separated by a space or via multiple arguments.
+func (s *Selection) AddClass(class ...string) *Selection {
+ classStr := strings.TrimSpace(strings.Join(class, " "))
+
+ if classStr == "" {
+ return s
+ }
+
+ tcls := getClassesSlice(classStr)
+ for _, n := range s.Nodes {
+ curClasses, attr := getClassesAndAttr(n, true)
+ for _, newClass := range tcls {
+ if !strings.Contains(curClasses, " "+newClass+" ") {
+ curClasses += newClass + " "
+ }
+ }
+
+ setClasses(n, attr, curClasses)
+ }
+
+ return s
+}
+
+// HasClass determines whether any of the matched elements are assigned the
+// given class.
+func (s *Selection) HasClass(class string) bool {
+ class = " " + class + " "
+ for _, n := range s.Nodes {
+ classes, _ := getClassesAndAttr(n, false)
+ if strings.Contains(classes, class) {
+ return true
+ }
+ }
+ return false
+}
+
+// RemoveClass removes the given class(es) from each element in the set of matched elements.
+// Multiple class names can be specified, separated by a space or via multiple arguments.
+// If no class name is provided, all classes are removed.
+func (s *Selection) RemoveClass(class ...string) *Selection {
+ var rclasses []string
+
+ classStr := strings.TrimSpace(strings.Join(class, " "))
+ remove := classStr == ""
+
+ if !remove {
+ rclasses = getClassesSlice(classStr)
+ }
+
+ for _, n := range s.Nodes {
+ if remove {
+ removeAttr(n, "class")
+ } else {
+ classes, attr := getClassesAndAttr(n, true)
+ for _, rcl := range rclasses {
+ classes = strings.Replace(classes, " "+rcl+" ", " ", -1)
+ }
+
+ setClasses(n, attr, classes)
+ }
+ }
+
+ return s
+}
+
+// ToggleClass adds or removes the given class(es) for each element in the set of matched elements.
+// Multiple class names can be specified, separated by a space or via multiple arguments.
+func (s *Selection) ToggleClass(class ...string) *Selection {
+ classStr := strings.TrimSpace(strings.Join(class, " "))
+
+ if classStr == "" {
+ return s
+ }
+
+ tcls := getClassesSlice(classStr)
+
+ for _, n := range s.Nodes {
+ classes, attr := getClassesAndAttr(n, true)
+ for _, tcl := range tcls {
+ if strings.Contains(classes, " "+tcl+" ") {
+ classes = strings.Replace(classes, " "+tcl+" ", " ", -1)
+ } else {
+ classes += tcl + " "
+ }
+ }
+
+ setClasses(n, attr, classes)
+ }
+
+ return s
+}
+
+func getAttributePtr(attrName string, n *html.Node) *html.Attribute {
+ if n == nil {
+ return nil
+ }
+
+ for i, a := range n.Attr {
+ if a.Key == attrName {
+ return &n.Attr[i]
+ }
+ }
+ return nil
+}
+
+// Private function to get the specified attribute's value from a node.
+func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) {
+ if a := getAttributePtr(attrName, n); a != nil {
+ val = a.Val
+ exists = true
+ }
+ return
+}
+
+// Get and normalize the "class" attribute from the node.
+func getClassesAndAttr(n *html.Node, create bool) (classes string, attr *html.Attribute) {
+ // Applies only to element nodes
+ if n.Type == html.ElementNode {
+ attr = getAttributePtr("class", n)
+ if attr == nil && create {
+ n.Attr = append(n.Attr, html.Attribute{
+ Key: "class",
+ Val: "",
+ })
+ attr = &n.Attr[len(n.Attr)-1]
+ }
+ }
+
+ if attr == nil {
+ classes = " "
+ } else {
+ classes = rxClassTrim.ReplaceAllString(" "+attr.Val+" ", " ")
+ }
+
+ return
+}
+
+func getClassesSlice(classes string) []string {
+ return strings.Split(rxClassTrim.ReplaceAllString(" "+classes+" ", " "), " ")
+}
+
+func removeAttr(n *html.Node, attrName string) {
+ for i, a := range n.Attr {
+ if a.Key == attrName {
+ n.Attr[i], n.Attr[len(n.Attr)-1], n.Attr =
+ n.Attr[len(n.Attr)-1], html.Attribute{}, n.Attr[:len(n.Attr)-1]
+ return
+ }
+ }
+}
+
+func setClasses(n *html.Node, attr *html.Attribute, classes string) {
+ classes = strings.TrimSpace(classes)
+ if classes == "" {
+ removeAttr(n, "class")
+ return
+ }
+
+ attr.Val = classes
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/query.go b/vendor/github.com/PuerkitoBio/goquery/query.go
new file mode 100644
index 00000000..fe86bf0b
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/query.go
@@ -0,0 +1,49 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+// Is checks the current matched set of elements against a selector and
+// returns true if at least one of these elements matches.
+func (s *Selection) Is(selector string) bool {
+ return s.IsMatcher(compileMatcher(selector))
+}
+
+// IsMatcher checks the current matched set of elements against a matcher and
+// returns true if at least one of these elements matches.
+func (s *Selection) IsMatcher(m Matcher) bool {
+ if len(s.Nodes) > 0 {
+ if len(s.Nodes) == 1 {
+ return m.Match(s.Nodes[0])
+ }
+ return len(m.Filter(s.Nodes)) > 0
+ }
+
+ return false
+}
+
+// IsFunction checks the current matched set of elements against a predicate and
+// returns true if at least one of these elements matches.
+func (s *Selection) IsFunction(f func(int, *Selection) bool) bool {
+ return s.FilterFunction(f).Length() > 0
+}
+
+// IsSelection checks the current matched set of elements against a Selection object
+// and returns true if at least one of these elements matches.
+func (s *Selection) IsSelection(sel *Selection) bool {
+ return s.FilterSelection(sel).Length() > 0
+}
+
+// IsNodes checks the current matched set of elements against the specified nodes
+// and returns true if at least one of these elements matches.
+func (s *Selection) IsNodes(nodes ...*html.Node) bool {
+ return s.FilterNodes(nodes...).Length() > 0
+}
+
+// Contains returns true if the specified Node is within,
+// at any depth, one of the nodes in the Selection object.
+// It is NOT inclusive, to behave like jQuery's implementation, and
+// unlike Javascript's .contains, so if the contained
+// node is itself in the selection, it returns false.
+func (s *Selection) Contains(n *html.Node) bool {
+ return sliceContains(s.Nodes, n)
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/traversal.go b/vendor/github.com/PuerkitoBio/goquery/traversal.go
new file mode 100644
index 00000000..5fa5315a
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/traversal.go
@@ -0,0 +1,698 @@
+package goquery
+
+import "golang.org/x/net/html"
+
+type siblingType int
+
+// Sibling type, used internally when iterating over children at the same
+// level (siblings) to specify which nodes are requested.
+const (
+ siblingPrevUntil siblingType = iota - 3
+ siblingPrevAll
+ siblingPrev
+ siblingAll
+ siblingNext
+ siblingNextAll
+ siblingNextUntil
+ siblingAllIncludingNonElements
+)
+
+// Find gets the descendants of each element in the current set of matched
+// elements, filtered by a selector. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) Find(selector string) *Selection {
+ return pushStack(s, findWithMatcher(s.Nodes, compileMatcher(selector)))
+}
+
+// FindMatcher gets the descendants of each element in the current set of matched
+// elements, filtered by the matcher. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) FindMatcher(m Matcher) *Selection {
+ return pushStack(s, findWithMatcher(s.Nodes, m))
+}
+
+// FindSelection gets the descendants of each element in the current
+// Selection, filtered by a Selection. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) FindSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, nil)
+ }
+ return s.FindNodes(sel.Nodes...)
+}
+
+// FindNodes gets the descendants of each element in the current
+// Selection, filtered by some nodes. It returns a new Selection object
+// containing these matched elements.
+func (s *Selection) FindNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ if sliceContains(s.Nodes, n) {
+ return []*html.Node{n}
+ }
+ return nil
+ }))
+}
+
+// Contents gets the children of each element in the Selection,
+// including text and comment nodes. It returns a new Selection object
+// containing these elements.
+func (s *Selection) Contents() *Selection {
+ return pushStack(s, getChildrenNodes(s.Nodes, siblingAllIncludingNonElements))
+}
+
+// ContentsFiltered gets the children of each element in the Selection,
+// filtered by the specified selector. It returns a new Selection
+// object containing these elements. Since selectors only act on Element nodes,
+// this function is an alias to ChildrenFiltered unless the selector is empty,
+// in which case it is an alias to Contents.
+func (s *Selection) ContentsFiltered(selector string) *Selection {
+ if selector != "" {
+ return s.ChildrenFiltered(selector)
+ }
+ return s.Contents()
+}
+
+// ContentsMatcher gets the children of each element in the Selection,
+// filtered by the specified matcher. It returns a new Selection
+// object containing these elements. Since matchers only act on Element nodes,
+// this function is an alias to ChildrenMatcher.
+func (s *Selection) ContentsMatcher(m Matcher) *Selection {
+ return s.ChildrenMatcher(m)
+}
+
+// Children gets the child elements of each element in the Selection.
+// It returns a new Selection object containing these elements.
+func (s *Selection) Children() *Selection {
+ return pushStack(s, getChildrenNodes(s.Nodes, siblingAll))
+}
+
+// ChildrenFiltered gets the child elements of each element in the Selection,
+// filtered by the specified selector. It returns a new
+// Selection object containing these elements.
+func (s *Selection) ChildrenFiltered(selector string) *Selection {
+ return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), compileMatcher(selector))
+}
+
+// ChildrenMatcher gets the child elements of each element in the Selection,
+// filtered by the specified matcher. It returns a new
+// Selection object containing these elements.
+func (s *Selection) ChildrenMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), m)
+}
+
+// Parent gets the parent of each element in the Selection. It returns a
+// new Selection object containing the matched elements.
+func (s *Selection) Parent() *Selection {
+ return pushStack(s, getParentNodes(s.Nodes))
+}
+
+// ParentFiltered gets the parent of each element in the Selection filtered by a
+// selector. It returns a new Selection object containing the matched elements.
+func (s *Selection) ParentFiltered(selector string) *Selection {
+ return filterAndPush(s, getParentNodes(s.Nodes), compileMatcher(selector))
+}
+
+// ParentMatcher gets the parent of each element in the Selection filtered by a
+// matcher. It returns a new Selection object containing the matched elements.
+func (s *Selection) ParentMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getParentNodes(s.Nodes), m)
+}
+
+// Closest gets the first element that matches the selector by testing the
+// element itself and traversing up through its ancestors in the DOM tree.
+func (s *Selection) Closest(selector string) *Selection {
+ cs := compileMatcher(selector)
+ return s.ClosestMatcher(cs)
+}
+
+// ClosestMatcher gets the first element that matches the matcher by testing the
+// element itself and traversing up through its ancestors in the DOM tree.
+func (s *Selection) ClosestMatcher(m Matcher) *Selection {
+ return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
+ // For each node in the selection, test the node itself, then each parent
+ // until a match is found.
+ for ; n != nil; n = n.Parent {
+ if m.Match(n) {
+ return []*html.Node{n}
+ }
+ }
+ return nil
+ }))
+}
+
+// ClosestNodes gets the first element that matches one of the nodes by testing the
+// element itself and traversing up through its ancestors in the DOM tree.
+func (s *Selection) ClosestNodes(nodes ...*html.Node) *Selection {
+ set := make(map[*html.Node]bool)
+ for _, n := range nodes {
+ set[n] = true
+ }
+ return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
+ // For each node in the selection, test the node itself, then each parent
+ // until a match is found.
+ for ; n != nil; n = n.Parent {
+ if set[n] {
+ return []*html.Node{n}
+ }
+ }
+ return nil
+ }))
+}
+
+// ClosestSelection gets the first element that matches one of the nodes in the
+// Selection by testing the element itself and traversing up through its ancestors
+// in the DOM tree.
+func (s *Selection) ClosestSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return pushStack(s, nil)
+ }
+ return s.ClosestNodes(sel.Nodes...)
+}
+
+// Parents gets the ancestors of each element in the current Selection. It
+// returns a new Selection object with the matched elements.
+func (s *Selection) Parents() *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, nil, nil))
+}
+
+// ParentsFiltered gets the ancestors of each element in the current
+// Selection. It returns a new Selection object with the matched elements.
+func (s *Selection) ParentsFiltered(selector string) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), compileMatcher(selector))
+}
+
+// ParentsMatcher gets the ancestors of each element in the current
+// Selection. It returns a new Selection object with the matched elements.
+func (s *Selection) ParentsMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), m)
+}
+
+// ParentsUntil gets the ancestors of each element in the Selection, up to but
+// not including the element matched by the selector. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) ParentsUntil(selector string) *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, compileMatcher(selector), nil))
+}
+
+// ParentsUntilMatcher gets the ancestors of each element in the Selection, up to but
+// not including the element matched by the matcher. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) ParentsUntilMatcher(m Matcher) *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, m, nil))
+}
+
+// ParentsUntilSelection gets the ancestors of each element in the Selection,
+// up to but not including the elements in the specified Selection. It returns a
+// new Selection object containing the matched elements.
+func (s *Selection) ParentsUntilSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.Parents()
+ }
+ return s.ParentsUntilNodes(sel.Nodes...)
+}
+
+// ParentsUntilNodes gets the ancestors of each element in the Selection,
+// up to but not including the specified nodes. It returns a
+// new Selection object containing the matched elements.
+func (s *Selection) ParentsUntilNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, getParentsNodes(s.Nodes, nil, nodes))
+}
+
+// ParentsFilteredUntil is like ParentsUntil, with the option to filter the
+// results based on a selector string. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) ParentsFilteredUntil(filterSelector, untilSelector string) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
+}
+
+// ParentsFilteredUntilMatcher is like ParentsUntilMatcher, with the option to filter the
+// results based on a matcher. It returns a new Selection object containing the matched elements.
+func (s *Selection) ParentsFilteredUntilMatcher(filter, until Matcher) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, until, nil), filter)
+}
+
+// ParentsFilteredUntilSelection is like ParentsUntilSelection, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
+ return s.ParentsMatcherUntilSelection(compileMatcher(filterSelector), sel)
+}
+
+// ParentsMatcherUntilSelection is like ParentsUntilSelection, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
+ if sel == nil {
+ return s.ParentsMatcher(filter)
+ }
+ return s.ParentsMatcherUntilNodes(filter, sel.Nodes...)
+}
+
+// ParentsFilteredUntilNodes is like ParentsUntilNodes, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), compileMatcher(filterSelector))
+}
+
+// ParentsMatcherUntilNodes is like ParentsUntilNodes, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) ParentsMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), filter)
+}
+
+// Siblings gets the siblings of each element in the Selection. It returns
+// a new Selection object containing the matched elements.
+func (s *Selection) Siblings() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil))
+}
+
+// SiblingsFiltered gets the siblings of each element in the Selection
+// filtered by a selector. It returns a new Selection object containing the
+// matched elements.
+func (s *Selection) SiblingsFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), compileMatcher(selector))
+}
+
+// SiblingsMatcher gets the siblings of each element in the Selection
+// filtered by a matcher. It returns a new Selection object containing the
+// matched elements.
+func (s *Selection) SiblingsMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), m)
+}
+
+// Next gets the immediately following sibling of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) Next() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil))
+}
+
+// NextFiltered gets the immediately following sibling of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), compileMatcher(selector))
+}
+
+// NextMatcher gets the immediately following sibling of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), m)
+}
+
+// NextAll gets all the following siblings of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) NextAll() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil))
+}
+
+// NextAllFiltered gets all the following siblings of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextAllFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), compileMatcher(selector))
+}
+
+// NextAllMatcher gets all the following siblings of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) NextAllMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), m)
+}
+
+// Prev gets the immediately preceding sibling of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) Prev() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil))
+}
+
+// PrevFiltered gets the immediately preceding sibling of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), compileMatcher(selector))
+}
+
+// PrevMatcher gets the immediately preceding sibling of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), m)
+}
+
+// PrevAll gets all the preceding siblings of each element in the
+// Selection. It returns a new Selection object containing the matched elements.
+func (s *Selection) PrevAll() *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil))
+}
+
+// PrevAllFiltered gets all the preceding siblings of each element in the
+// Selection filtered by a selector. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevAllFiltered(selector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), compileMatcher(selector))
+}
+
+// PrevAllMatcher gets all the preceding siblings of each element in the
+// Selection filtered by a matcher. It returns a new Selection object
+// containing the matched elements.
+func (s *Selection) PrevAllMatcher(m Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), m)
+}
+
+// NextUntil gets all following siblings of each element up to but not
+// including the element matched by the selector. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntil(selector string) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ compileMatcher(selector), nil))
+}
+
+// NextUntilMatcher gets all following siblings of each element up to but not
+// including the element matched by the matcher. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntilMatcher(m Matcher) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ m, nil))
+}
+
+// NextUntilSelection gets all following siblings of each element up to but not
+// including the element matched by the Selection. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntilSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.NextAll()
+ }
+ return s.NextUntilNodes(sel.Nodes...)
+}
+
+// NextUntilNodes gets all following siblings of each element up to but not
+// including the element matched by the nodes. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) NextUntilNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ nil, nodes))
+}
+
+// PrevUntil gets all preceding siblings of each element up to but not
+// including the element matched by the selector. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntil(selector string) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ compileMatcher(selector), nil))
+}
+
+// PrevUntilMatcher gets all preceding siblings of each element up to but not
+// including the element matched by the matcher. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntilMatcher(m Matcher) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ m, nil))
+}
+
+// PrevUntilSelection gets all preceding siblings of each element up to but not
+// including the element matched by the Selection. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntilSelection(sel *Selection) *Selection {
+ if sel == nil {
+ return s.PrevAll()
+ }
+ return s.PrevUntilNodes(sel.Nodes...)
+}
+
+// PrevUntilNodes gets all preceding siblings of each element up to but not
+// including the element matched by the nodes. It returns a new Selection
+// object containing the matched elements.
+func (s *Selection) PrevUntilNodes(nodes ...*html.Node) *Selection {
+ return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ nil, nodes))
+}
+
+// NextFilteredUntil is like NextUntil, with the option to filter
+// the results based on a selector string.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntil(filterSelector, untilSelector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
+}
+
+// NextFilteredUntilMatcher is like NextUntilMatcher, with the option to filter
+// the results based on a matcher.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntilMatcher(filter, until Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ until, nil), filter)
+}
+
+// NextFilteredUntilSelection is like NextUntilSelection, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
+ return s.NextMatcherUntilSelection(compileMatcher(filterSelector), sel)
+}
+
+// NextMatcherUntilSelection is like NextUntilSelection, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
+ if sel == nil {
+ return s.NextMatcher(filter)
+ }
+ return s.NextMatcherUntilNodes(filter, sel.Nodes...)
+}
+
+// NextFilteredUntilNodes is like NextUntilNodes, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ nil, nodes), compileMatcher(filterSelector))
+}
+
+// NextMatcherUntilNodes is like NextUntilNodes, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) NextMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
+ nil, nodes), filter)
+}
+
+// PrevFilteredUntil is like PrevUntil, with the option to filter
+// the results based on a selector string.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntil(filterSelector, untilSelector string) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
+}
+
+// PrevFilteredUntilMatcher is like PrevUntilMatcher, with the option to filter
+// the results based on a matcher.
+// It returns a new Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntilMatcher(filter, until Matcher) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ until, nil), filter)
+}
+
+// PrevFilteredUntilSelection is like PrevUntilSelection, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
+ return s.PrevMatcherUntilSelection(compileMatcher(filterSelector), sel)
+}
+
+// PrevMatcherUntilSelection is like PrevUntilSelection, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
+ if sel == nil {
+ return s.PrevMatcher(filter)
+ }
+ return s.PrevMatcherUntilNodes(filter, sel.Nodes...)
+}
+
+// PrevFilteredUntilNodes is like PrevUntilNodes, with the
+// option to filter the results based on a selector string. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ nil, nodes), compileMatcher(filterSelector))
+}
+
+// PrevMatcherUntilNodes is like PrevUntilNodes, with the
+// option to filter the results based on a matcher. It returns a new
+// Selection object containing the matched elements.
+func (s *Selection) PrevMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
+ return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
+ nil, nodes), filter)
+}
+
+// Filter and push filters the nodes based on a matcher, and pushes the results
+// on the stack, with the srcSel as previous selection.
+func filterAndPush(srcSel *Selection, nodes []*html.Node, m Matcher) *Selection {
+ // Create a temporary Selection with the specified nodes to filter using winnow
+ sel := &Selection{nodes, srcSel.document, nil}
+ // Filter based on matcher and push on stack
+ return pushStack(srcSel, winnow(sel, m, true))
+}
+
+// Internal implementation of Find that return raw nodes.
+func findWithMatcher(nodes []*html.Node, m Matcher) []*html.Node {
+ // Map nodes to find the matches within the children of each node
+ return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
+ // Go down one level, becausejQuery's Find selects only within descendants
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.ElementNode {
+ result = append(result, m.MatchAll(c)...)
+ }
+ }
+ return
+ })
+}
+
+// Internal implementation to get all parent nodes, stopping at the specified
+// node (or nil if no stop).
+func getParentsNodes(nodes []*html.Node, stopm Matcher, stopNodes []*html.Node) []*html.Node {
+ return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
+ for p := n.Parent; p != nil; p = p.Parent {
+ sel := newSingleSelection(p, nil)
+ if stopm != nil {
+ if sel.IsMatcher(stopm) {
+ break
+ }
+ } else if len(stopNodes) > 0 {
+ if sel.IsNodes(stopNodes...) {
+ break
+ }
+ }
+ if p.Type == html.ElementNode {
+ result = append(result, p)
+ }
+ }
+ return
+ })
+}
+
+// Internal implementation of sibling nodes that return a raw slice of matches.
+func getSiblingNodes(nodes []*html.Node, st siblingType, untilm Matcher, untilNodes []*html.Node) []*html.Node {
+ var f func(*html.Node) bool
+
+ // If the requested siblings are ...Until, create the test function to
+ // determine if the until condition is reached (returns true if it is)
+ if st == siblingNextUntil || st == siblingPrevUntil {
+ f = func(n *html.Node) bool {
+ if untilm != nil {
+ // Matcher-based condition
+ sel := newSingleSelection(n, nil)
+ return sel.IsMatcher(untilm)
+ } else if len(untilNodes) > 0 {
+ // Nodes-based condition
+ sel := newSingleSelection(n, nil)
+ return sel.IsNodes(untilNodes...)
+ }
+ return false
+ }
+ }
+
+ return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ return getChildrenWithSiblingType(n.Parent, st, n, f)
+ })
+}
+
+// Gets the children nodes of each node in the specified slice of nodes,
+// based on the sibling type request.
+func getChildrenNodes(nodes []*html.Node, st siblingType) []*html.Node {
+ return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ return getChildrenWithSiblingType(n, st, nil, nil)
+ })
+}
+
+// Gets the children of the specified parent, based on the requested sibling
+// type, skipping a specified node if required.
+func getChildrenWithSiblingType(parent *html.Node, st siblingType, skipNode *html.Node,
+ untilFunc func(*html.Node) bool) (result []*html.Node) {
+
+ // Create the iterator function
+ var iter = func(cur *html.Node) (ret *html.Node) {
+ // Based on the sibling type requested, iterate the right way
+ for {
+ switch st {
+ case siblingAll, siblingAllIncludingNonElements:
+ if cur == nil {
+ // First iteration, start with first child of parent
+ // Skip node if required
+ if ret = parent.FirstChild; ret == skipNode && skipNode != nil {
+ ret = skipNode.NextSibling
+ }
+ } else {
+ // Skip node if required
+ if ret = cur.NextSibling; ret == skipNode && skipNode != nil {
+ ret = skipNode.NextSibling
+ }
+ }
+ case siblingPrev, siblingPrevAll, siblingPrevUntil:
+ if cur == nil {
+ // Start with previous sibling of the skip node
+ ret = skipNode.PrevSibling
+ } else {
+ ret = cur.PrevSibling
+ }
+ case siblingNext, siblingNextAll, siblingNextUntil:
+ if cur == nil {
+ // Start with next sibling of the skip node
+ ret = skipNode.NextSibling
+ } else {
+ ret = cur.NextSibling
+ }
+ default:
+ panic("Invalid sibling type.")
+ }
+ if ret == nil || ret.Type == html.ElementNode || st == siblingAllIncludingNonElements {
+ return
+ }
+ // Not a valid node, try again from this one
+ cur = ret
+ }
+ }
+
+ for c := iter(nil); c != nil; c = iter(c) {
+ // If this is an ...Until case, test before append (returns true
+ // if the until condition is reached)
+ if st == siblingNextUntil || st == siblingPrevUntil {
+ if untilFunc(c) {
+ return
+ }
+ }
+ result = append(result, c)
+ if st == siblingNext || st == siblingPrev {
+ // Only one node was requested (immediate next or previous), so exit
+ return
+ }
+ }
+ return
+}
+
+// Internal implementation of parent nodes that return a raw slice of Nodes.
+func getParentNodes(nodes []*html.Node) []*html.Node {
+ return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
+ if n.Parent != nil && n.Parent.Type == html.ElementNode {
+ return []*html.Node{n.Parent}
+ }
+ return nil
+ })
+}
+
+// Internal map function used by many traversing methods. Takes the source nodes
+// to iterate on and the mapping function that returns an array of nodes.
+// Returns an array of nodes mapped by calling the callback function once for
+// each node in the source nodes.
+func mapNodes(nodes []*html.Node, f func(int, *html.Node) []*html.Node) (result []*html.Node) {
+ set := make(map[*html.Node]bool)
+ for i, n := range nodes {
+ if vals := f(i, n); len(vals) > 0 {
+ result = appendWithoutDuplicates(result, vals, set)
+ }
+ }
+ return result
+}
diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go
new file mode 100644
index 00000000..6646c143
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/type.go
@@ -0,0 +1,203 @@
+package goquery
+
+import (
+ "errors"
+ "io"
+ "net/http"
+ "net/url"
+
+ "github.com/andybalholm/cascadia"
+ "golang.org/x/net/html"
+)
+
+// Document represents an HTML document to be manipulated. Unlike jQuery, which
+// is loaded as part of a DOM document, and thus acts upon its containing
+// document, GoQuery doesn't know which HTML document to act upon. So it needs
+// to be told, and that's what the Document class is for. It holds the root
+// document node to manipulate, and can make selections on this document.
+type Document struct {
+ *Selection
+ Url *url.URL
+ rootNode *html.Node
+}
+
+// NewDocumentFromNode is a Document constructor that takes a root html Node
+// as argument.
+func NewDocumentFromNode(root *html.Node) *Document {
+ return newDocument(root, nil)
+}
+
+// NewDocument is a Document constructor that takes a string URL as argument.
+// It loads the specified document, parses it, and stores the root Document
+// node, ready to be manipulated.
+//
+// Deprecated: Use the net/http standard library package to make the request
+// and validate the response before calling goquery.NewDocumentFromReader
+// with the response's body.
+func NewDocument(url string) (*Document, error) {
+ // Load the URL
+ res, e := http.Get(url)
+ if e != nil {
+ return nil, e
+ }
+ return NewDocumentFromResponse(res)
+}
+
+// NewDocumentFromReader returns a Document from an io.Reader.
+// It returns an error as second value if the reader's data cannot be parsed
+// as html. It does not check if the reader is also an io.Closer, the
+// provided reader is never closed by this call. It is the responsibility
+// of the caller to close it if required.
+func NewDocumentFromReader(r io.Reader) (*Document, error) {
+ root, e := html.Parse(r)
+ if e != nil {
+ return nil, e
+ }
+ return newDocument(root, nil), nil
+}
+
+// NewDocumentFromResponse is another Document constructor that takes an http response as argument.
+// It loads the specified response's document, parses it, and stores the root Document
+// node, ready to be manipulated. The response's body is closed on return.
+//
+// Deprecated: Use goquery.NewDocumentFromReader with the response's body.
+func NewDocumentFromResponse(res *http.Response) (*Document, error) {
+ if res == nil {
+ return nil, errors.New("Response is nil")
+ }
+ defer res.Body.Close()
+ if res.Request == nil {
+ return nil, errors.New("Response.Request is nil")
+ }
+
+ // Parse the HTML into nodes
+ root, e := html.Parse(res.Body)
+ if e != nil {
+ return nil, e
+ }
+
+ // Create and fill the document
+ return newDocument(root, res.Request.URL), nil
+}
+
+// CloneDocument creates a deep-clone of a document.
+func CloneDocument(doc *Document) *Document {
+ return newDocument(cloneNode(doc.rootNode), doc.Url)
+}
+
+// Private constructor, make sure all fields are correctly filled.
+func newDocument(root *html.Node, url *url.URL) *Document {
+ // Create and fill the document
+ d := &Document{nil, url, root}
+ d.Selection = newSingleSelection(root, d)
+ return d
+}
+
+// Selection represents a collection of nodes matching some criteria. The
+// initial Selection can be created by using Document.Find, and then
+// manipulated using the jQuery-like chainable syntax and methods.
+type Selection struct {
+ Nodes []*html.Node
+ document *Document
+ prevSel *Selection
+}
+
+// Helper constructor to create an empty selection
+func newEmptySelection(doc *Document) *Selection {
+ return &Selection{nil, doc, nil}
+}
+
+// Helper constructor to create a selection of only one node
+func newSingleSelection(node *html.Node, doc *Document) *Selection {
+ return &Selection{[]*html.Node{node}, doc, nil}
+}
+
+// Matcher is an interface that defines the methods to match
+// HTML nodes against a compiled selector string. Cascadia's
+// Selector implements this interface.
+type Matcher interface {
+ Match(*html.Node) bool
+ MatchAll(*html.Node) []*html.Node
+ Filter([]*html.Node) []*html.Node
+}
+
+// Single compiles a selector string to a Matcher that stops after the first
+// match is found.
+//
+// By default, Selection.Find and other functions that accept a selector string
+// to select nodes will use all matches corresponding to that selector. By
+// using the Matcher returned by Single, at most the first match will be
+// selected.
+//
+// For example, those two statements are semantically equivalent:
+//
+// sel1 := doc.Find("a").First()
+// sel2 := doc.FindMatcher(goquery.Single("a"))
+//
+// The one using Single is optimized to be potentially much faster on large
+// documents.
+//
+// Only the behaviour of the MatchAll method of the Matcher interface is
+// altered compared to standard Matchers. This means that the single-selection
+// property of the Matcher only applies for Selection methods where the Matcher
+// is used to select nodes, not to filter or check if a node matches the
+// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g.
+// FilterMatcher(Single("div")) will still result in a Selection with multiple
+// "div"s if there were many "div"s in the Selection to begin with).
+func Single(selector string) Matcher {
+ return singleMatcher{compileMatcher(selector)}
+}
+
+// SingleMatcher returns a Matcher matches the same nodes as m, but that stops
+// after the first match is found.
+//
+// See the documentation of function Single for more details.
+func SingleMatcher(m Matcher) Matcher {
+ if _, ok := m.(singleMatcher); ok {
+ // m is already a singleMatcher
+ return m
+ }
+ return singleMatcher{m}
+}
+
+// compileMatcher compiles the selector string s and returns
+// the corresponding Matcher. If s is an invalid selector string,
+// it returns a Matcher that fails all matches.
+func compileMatcher(s string) Matcher {
+ cs, err := cascadia.Compile(s)
+ if err != nil {
+ return invalidMatcher{}
+ }
+ return cs
+}
+
+type singleMatcher struct {
+ Matcher
+}
+
+func (m singleMatcher) MatchAll(n *html.Node) []*html.Node {
+ // Optimized version - stops finding at the first match (cascadia-compiled
+ // matchers all use this code path).
+ if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok {
+ node := mm.MatchFirst(n)
+ if node == nil {
+ return nil
+ }
+ return []*html.Node{node}
+ }
+
+ // Fallback version, for e.g. test mocks that don't provide the MatchFirst
+ // method.
+ nodes := m.Matcher.MatchAll(n)
+ if len(nodes) > 0 {
+ return nodes[:1:1]
+ }
+ return nil
+}
+
+// invalidMatcher is a Matcher that always fails to match.
+type invalidMatcher struct{}
+
+func (invalidMatcher) Match(n *html.Node) bool { return false }
+func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil }
+func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }
diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go
new file mode 100644
index 00000000..ecd3453f
--- /dev/null
+++ b/vendor/github.com/PuerkitoBio/goquery/utilities.go
@@ -0,0 +1,178 @@
+package goquery
+
+import (
+ "bytes"
+ "io"
+
+ "golang.org/x/net/html"
+)
+
+// used to determine if a set (map[*html.Node]bool) should be used
+// instead of iterating over a slice. The set uses more memory and
+// is slower than slice iteration for small N.
+const minNodesForSet = 1000
+
+var nodeNames = []string{
+ html.ErrorNode: "#error",
+ html.TextNode: "#text",
+ html.DocumentNode: "#document",
+ html.CommentNode: "#comment",
+}
+
+// NodeName returns the node name of the first element in the selection.
+// It tries to behave in a similar way as the DOM's nodeName property
+// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
+//
+// Go's net/html package defines the following node types, listed with
+// the corresponding returned value from this function:
+//
+// ErrorNode : #error
+// TextNode : #text
+// DocumentNode : #document
+// ElementNode : the element's tag name
+// CommentNode : #comment
+// DoctypeNode : the name of the document type
+//
+func NodeName(s *Selection) string {
+ if s.Length() == 0 {
+ return ""
+ }
+ return nodeName(s.Get(0))
+}
+
+// nodeName returns the node name of the given html node.
+// See NodeName for additional details on behaviour.
+func nodeName(node *html.Node) string {
+ if node == nil {
+ return ""
+ }
+
+ switch node.Type {
+ case html.ElementNode, html.DoctypeNode:
+ return node.Data
+ default:
+ if int(node.Type) < len(nodeNames) {
+ return nodeNames[node.Type]
+ }
+ return ""
+ }
+}
+
+// Render renders the HTML of the first item in the selection and writes it to
+// the writer. It behaves the same as OuterHtml but writes to w instead of
+// returning the string.
+func Render(w io.Writer, s *Selection) error {
+ if s.Length() == 0 {
+ return nil
+ }
+ n := s.Get(0)
+ return html.Render(w, n)
+}
+
+// OuterHtml returns the outer HTML rendering of the first item in
+// the selection - that is, the HTML including the first element's
+// tag and attributes.
+//
+// Unlike Html, this is a function and not a method on the Selection,
+// because this is not a jQuery method (in javascript-land, this is
+// a property provided by the DOM).
+func OuterHtml(s *Selection) (string, error) {
+ var buf bytes.Buffer
+ if err := Render(&buf, s); err != nil {
+ return "", err
+ }
+ return buf.String(), nil
+}
+
+// Loop through all container nodes to search for the target node.
+func sliceContains(container []*html.Node, contained *html.Node) bool {
+ for _, n := range container {
+ if nodeContains(n, contained) {
+ return true
+ }
+ }
+
+ return false
+}
+
+// Checks if the contained node is within the container node.
+func nodeContains(container *html.Node, contained *html.Node) bool {
+ // Check if the parent of the contained node is the container node, traversing
+ // upward until the top is reached, or the container is found.
+ for contained = contained.Parent; contained != nil; contained = contained.Parent {
+ if container == contained {
+ return true
+ }
+ }
+ return false
+}
+
+// Checks if the target node is in the slice of nodes.
+func isInSlice(slice []*html.Node, node *html.Node) bool {
+ return indexInSlice(slice, node) > -1
+}
+
+// Returns the index of the target node in the slice, or -1.
+func indexInSlice(slice []*html.Node, node *html.Node) int {
+ if node != nil {
+ for i, n := range slice {
+ if n == node {
+ return i
+ }
+ }
+ }
+ return -1
+}
+
+// Appends the new nodes to the target slice, making sure no duplicate is added.
+// There is no check to the original state of the target slice, so it may still
+// contain duplicates. The target slice is returned because append() may create
+// a new underlying array. If targetSet is nil, a local set is created with the
+// target if len(target) + len(nodes) is greater than minNodesForSet.
+func appendWithoutDuplicates(target []*html.Node, nodes []*html.Node, targetSet map[*html.Node]bool) []*html.Node {
+ // if there are not that many nodes, don't use the map, faster to just use nested loops
+ // (unless a non-nil targetSet is passed, in which case the caller knows better).
+ if targetSet == nil && len(target)+len(nodes) < minNodesForSet {
+ for _, n := range nodes {
+ if !isInSlice(target, n) {
+ target = append(target, n)
+ }
+ }
+ return target
+ }
+
+ // if a targetSet is passed, then assume it is reliable, otherwise create one
+ // and initialize it with the current target contents.
+ if targetSet == nil {
+ targetSet = make(map[*html.Node]bool, len(target))
+ for _, n := range target {
+ targetSet[n] = true
+ }
+ }
+ for _, n := range nodes {
+ if !targetSet[n] {
+ target = append(target, n)
+ targetSet[n] = true
+ }
+ }
+
+ return target
+}
+
+// Loop through a selection, returning only those nodes that pass the predicate
+// function.
+func grep(sel *Selection, predicate func(i int, s *Selection) bool) (result []*html.Node) {
+ for i, n := range sel.Nodes {
+ if predicate(i, newSingleSelection(n, sel.document)) {
+ result = append(result, n)
+ }
+ }
+ return result
+}
+
+// Creates a new Selection object based on the specified nodes, and keeps the
+// source Selection object on the stack (linked list).
+func pushStack(fromSel *Selection, nodes []*html.Node) *Selection {
+ result := &Selection{nodes, fromSel.document, fromSel}
+ return result
+}
diff --git a/vendor/github.com/andybalholm/cascadia/.travis.yml b/vendor/github.com/andybalholm/cascadia/.travis.yml
new file mode 100644
index 00000000..6f227517
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/.travis.yml
@@ -0,0 +1,14 @@
+language: go
+
+go:
+ - 1.3
+ - 1.4
+
+install:
+ - go get github.com/andybalholm/cascadia
+
+script:
+ - go test -v
+
+notifications:
+ email: false
diff --git a/vendor/github.com/andybalholm/cascadia/LICENSE b/vendor/github.com/andybalholm/cascadia/LICENSE
new file mode 100644
index 00000000..ee5ad35a
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2011 Andy Balholm. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/andybalholm/cascadia/README.md b/vendor/github.com/andybalholm/cascadia/README.md
new file mode 100644
index 00000000..6433cb9c
--- /dev/null
+++ b/vendor/github.com/andybalholm/cascadia/README.md
@@ -0,0 +1,144 @@
+# cascadia
+
+[![](https://travis-ci.org/andybalholm/cascadia.svg)](https://travis-ci.org/andybalholm/cascadia)
+
+The Cascadia package implements CSS selectors for use with the parse trees produced by the html package.
+
+To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package.
+
+[Refer to godoc here](https://godoc.org/github.com/andybalholm/cascadia).
+
+## Example
+
+The following is an example of how you can use Cascadia.
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "strings"
+
+ "github.com/andybalholm/cascadia"
+ "golang.org/x/net/html"
+)
+
+var pricingHtml string = `
+
Free
+ $0/mo
+
+
+ Pro
+ $15/mo
+
+
+ Enterprise
+ $29/mo
+
+
+
tag while this function will preserve
+//
as newline.
+func InnerText(node *html.Node) string {
+ var buffer bytes.Buffer
+ var finder func(*html.Node)
+
+ finder = func(n *html.Node) {
+ switch n.Type {
+ case html.TextNode:
+ buffer.WriteString(" " + n.Data + " ")
+
+ case html.ElementNode:
+ if n.Data == "br" {
+ buffer.WriteString(`|\/|`)
+ return
+ }
+
+ if HasAttribute(n, "hidden") {
+ return
+ }
+
+ styleAttr := GetAttribute(n, "style")
+ if rxDisplayNone.MatchString(styleAttr) || rxVisibilityHidden.MatchString(styleAttr) {
+ return
+ }
+ }
+
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ finder(child)
+ }
+ }
+
+ finder(node)
+ text := buffer.String()
+ text = strings.Join(strings.Fields(text), " ")
+ text = rxPunctuation.ReplaceAllString(text, "$1 $2")
+ text = rxTempNewline.ReplaceAllString(text, "\n")
+ return text
+}
+
+// OuterHTML returns an HTML serialization of the element and its descendants.
+// The returned HTML value is escaped.
+func OuterHTML(node *html.Node) string {
+ if node == nil {
+ return ""
+ }
+
+ var buffer bytes.Buffer
+ err := html.Render(&buffer, node)
+ if err != nil {
+ return ""
+ }
+
+ return buffer.String()
+}
+
+// InnerHTML returns the HTML content (inner HTML) of an element.
+// The returned HTML value is escaped.
+func InnerHTML(node *html.Node) string {
+ var err error
+ var buffer bytes.Buffer
+
+ if node == nil {
+ return ""
+ }
+
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ err = html.Render(&buffer, child)
+ if err != nil {
+ return ""
+ }
+ }
+
+ return strings.TrimSpace(buffer.String())
+}
+
+// DocumentElement returns the Element that is the root element
+// of the document. Since we are working with HTML document,
+// the root will be element for HTML documents).
+func DocumentElement(doc *html.Node) *html.Node {
+ if nodes := GetElementsByTagName(doc, "html"); len(nodes) > 0 {
+ return nodes[0]
+ }
+ return nil
+}
+
+// ID returns the value of the id attribute of the specified element.
+func ID(node *html.Node) string {
+ id := GetAttribute(node, "id")
+ id = strings.TrimSpace(id)
+ return id
+}
+
+// ClassName returns the value of the class attribute of
+// the specified element.
+func ClassName(node *html.Node) string {
+ className := GetAttribute(node, "class")
+ className = strings.TrimSpace(className)
+ className = strings.Join(strings.Fields(className), " ")
+ return className
+}
+
+// Children returns an HTMLCollection of the direct child elements of Node.
+func Children(node *html.Node) []*html.Node {
+ var children []*html.Node
+ if node == nil {
+ return nil
+ }
+
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == html.ElementNode {
+ children = append(children, child)
+ }
+ }
+
+ return children
+}
+
+// ChildNodes returns list of a node's direct children.
+func ChildNodes(node *html.Node) []*html.Node {
+ var childNodes []*html.Node
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ childNodes = append(childNodes, child)
+ }
+ return childNodes
+}
+
+// FirstElementChild returns the object's first child Element,
+// or nil if there are no child elements.
+func FirstElementChild(node *html.Node) *html.Node {
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == html.ElementNode {
+ return child
+ }
+ }
+ return nil
+}
+
+// PreviousElementSibling returns the the Element immediately prior
+// to the specified one in its parent's children list, or null if
+// the specified element is the first one in the list.
+func PreviousElementSibling(node *html.Node) *html.Node {
+ for sibling := node.PrevSibling; sibling != nil; sibling = sibling.PrevSibling {
+ if sibling.Type == html.ElementNode {
+ return sibling
+ }
+ }
+ return nil
+}
+
+// NextElementSibling returns the Element immediately following
+// the specified one in its parent's children list, or nil if the
+// specified Element is the last one in the list.
+func NextElementSibling(node *html.Node) *html.Node {
+ for sibling := node.NextSibling; sibling != nil; sibling = sibling.NextSibling {
+ if sibling.Type == html.ElementNode {
+ return sibling
+ }
+ }
+ return nil
+}
+
+// AppendChild adds a node to the end of the list of children of a
+// specified parent node. If the given child is a reference to an
+// existing node in the document, AppendChild() moves it from its
+// current position to the new position.
+func AppendChild(node *html.Node, child *html.Node) {
+ // Make sure node is not void
+ if !IsVoidElement(node) {
+ DetachChild(child)
+ node.AppendChild(child)
+ }
+}
+
+// PrependChild works like AppendChild() except it adds a node to the
+// beginning of the list of children of a specified parent node.
+func PrependChild(node *html.Node, child *html.Node) {
+ // Make sure node is not void
+ if !IsVoidElement(node) {
+ DetachChild(child)
+ if node.FirstChild != nil {
+ node.InsertBefore(child, node.FirstChild)
+ } else {
+ node.AppendChild(child)
+ }
+ }
+}
+
+// ReplaceChild replaces a child node within the given (parent) node.
+// If the new child is already exist in document, ReplaceChild() will move it
+// from its current position to replace old child. Returns both the new and old child.
+//
+// TODO: I'm note sure but I *think* there are some issues here. Check later I guess.
+func ReplaceChild(parent *html.Node, newChild *html.Node, oldChild *html.Node) (*html.Node, *html.Node) {
+ // Make sure parent is specified and not void
+ if parent == nil && !IsVoidElement(parent) {
+ return newChild, oldChild
+ }
+
+ // Make sure the specified parent IS the parent of the old child
+ if oldChild.Parent != parent {
+ return newChild, oldChild
+ }
+
+ // Detach the new child
+ DetachChild(newChild)
+ parent.InsertBefore(newChild, oldChild)
+ parent.RemoveChild(oldChild)
+ return newChild, oldChild
+}
+
+// IncludeNode determines if node is included inside nodeList.
+func IncludeNode(nodeList []*html.Node, node *html.Node) bool {
+ for i := 0; i < len(nodeList); i++ {
+ if nodeList[i] == node {
+ return true
+ }
+ }
+ return false
+}
+
+// Clone returns a clone of the node and (if specified) its children.
+// However, it will be detached from the original's parents and siblings.
+func Clone(src *html.Node, deep bool) *html.Node {
+ clone := &html.Node{
+ Type: src.Type,
+ DataAtom: src.DataAtom,
+ Data: src.Data,
+ Attr: append([]html.Attribute{}, src.Attr...),
+ }
+
+ if deep {
+ for child := src.FirstChild; child != nil; child = child.NextSibling {
+ clone.AppendChild(Clone(child, deep))
+ }
+ }
+
+ return clone
+}
+
+// GetAllNodesWithTag is wrapper for GetElementsByTagName()
+// which allow to get several tags at once.
+func GetAllNodesWithTag(node *html.Node, tagNames ...string) []*html.Node {
+ var result []*html.Node
+ for i := 0; i < len(tagNames); i++ {
+ result = append(result, GetElementsByTagName(node, tagNames[i])...)
+ }
+ return result
+}
+
+// ForEachNode iterates over a NodeList and runs fn on each node.
+func ForEachNode(nodeList []*html.Node, fn func(*html.Node, int)) {
+ for i := 0; i < len(nodeList); i++ {
+ fn(nodeList[i], i)
+ }
+}
+
+// RemoveNodes iterates over a NodeList, calls `filterFn` for each node
+// and removes node if function returned `true`. If function is not
+// passed, removes all the nodes in node list.
+func RemoveNodes(nodeList []*html.Node, filterFn func(*html.Node) bool) {
+ for i := len(nodeList) - 1; i >= 0; i-- {
+ node := nodeList[i]
+ parentNode := node.Parent
+ if parentNode != nil && (filterFn == nil || filterFn(node)) {
+ parentNode.RemoveChild(node)
+ }
+ }
+}
+
+// SetTextContent sets the text content of the specified node.
+func SetTextContent(node *html.Node, text string) {
+ if IsVoidElement(node) {
+ return
+ }
+
+ child := node.FirstChild
+ for child != nil {
+ nextSibling := child.NextSibling
+ node.RemoveChild(child)
+ child = nextSibling
+ }
+
+ node.AppendChild(&html.Node{
+ Type: html.TextNode,
+ Data: text,
+ })
+}
+
+// SetInnerHTML sets inner HTML of the specified node.
+func SetInnerHTML(node *html.Node, rawHTML string) {
+ // Parse raw HTML
+ parsedHTML, err := html.Parse(strings.NewReader(rawHTML))
+ if err != nil || parsedHTML == nil {
+ return
+ }
+
+ // Remove node's current children
+ child := node.FirstChild
+ for child != nil {
+ nextSibling := child.NextSibling
+ node.RemoveChild(child)
+ child = nextSibling
+ }
+
+ // Put content of parsed HTML to the node
+ if body := QuerySelector(parsedHTML, "body"); body != nil {
+ bodyChild := body.FirstChild
+ for bodyChild != nil {
+ nextSibling := bodyChild.NextSibling
+ AppendChild(node, bodyChild)
+ bodyChild = nextSibling
+ }
+ }
+}
+
+// IsVoidElement check whether a node can have any contents or not.
+// Return true if element is void (can't have any children).
+func IsVoidElement(n *html.Node) bool {
+ // If it's not element, it's void
+ if n.Type != html.ElementNode {
+ return true
+ }
+
+ // Check tag name
+ switch n.Data {
+ case "area", "base", "br", "col", "embed", "hr",
+ "img", "input", "keygen", "link", "meta",
+ "param", "source", "track", "wbr":
+ return true
+ default:
+ return false
+ }
+}
+
+func DetachChild(child *html.Node) {
+ if child.Parent != nil || child.PrevSibling != nil || child.NextSibling != nil {
+ if child.Parent != nil {
+ if child.Parent.FirstChild == child {
+ child.Parent.FirstChild = child.NextSibling
+ }
+
+ if child.Parent.LastChild == child {
+ child.Parent.LastChild = child.PrevSibling
+ }
+ }
+
+ if child.PrevSibling != nil {
+ child.PrevSibling.NextSibling = child.NextSibling
+ }
+
+ if child.NextSibling != nil {
+ child.NextSibling.PrevSibling = child.PrevSibling
+ }
+
+ child.Parent = nil
+ child.PrevSibling = nil
+ child.NextSibling = nil
+ }
+}
diff --git a/vendor/github.com/go-shiori/dom/parser.go b/vendor/github.com/go-shiori/dom/parser.go
new file mode 100644
index 00000000..20a0f0b9
--- /dev/null
+++ b/vendor/github.com/go-shiori/dom/parser.go
@@ -0,0 +1,61 @@
+package dom
+
+import (
+ "bytes"
+ "io"
+ "io/ioutil"
+
+ "github.com/gogs/chardet"
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/charset"
+ xunicode "golang.org/x/text/encoding/unicode"
+ "golang.org/x/text/runes"
+ "golang.org/x/text/transform"
+ "golang.org/x/text/unicode/norm"
+)
+
+// FastParse parses html.Node from the specified reader without caring about
+// text encoding. It always assume that the input uses UTF-8 encoding.
+func FastParse(r io.Reader) (*html.Node, error) {
+ return html.Parse(r)
+}
+
+// Parse parses html.Node from the specified reader while converting the character
+// encoding into UTF-8. This function is useful to correctly parse web pages that
+// uses custom text encoding, e.g. web pages from Asian websites. However, since it
+// has to detect charset before parsing, this function is quite slow and expensive
+// so if you sure the reader uses valid UTF-8, just use FastParse.
+func Parse(r io.Reader) (*html.Node, error) {
+ // Split the reader using tee
+ content, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+
+ // Detect page encoding
+ res, err := chardet.NewHtmlDetector().DetectBest(content)
+ if err != nil {
+ return nil, err
+ }
+
+ pageEncoding, _ := charset.Lookup(res.Charset)
+ if pageEncoding == nil {
+ pageEncoding = xunicode.UTF8
+ }
+
+ // Parse HTML using the page encoding
+ r = bytes.NewReader(content)
+ r = transform.NewReader(r, pageEncoding.NewDecoder())
+ r = normalizeTextEncoding(r)
+ return html.Parse(r)
+}
+
+// normalizeTextEncoding convert text encoding from NFD to NFC.
+// It also remove soft hyphen since apparently it's useless in web.
+// See: https://web.archive.org/web/19990117011731/http://www.hut.fi/~jkorpela/shy.html
+func normalizeTextEncoding(r io.Reader) io.Reader {
+ fnSoftHyphen := func(r rune) bool { return r == '\u00AD' }
+ softHyphenSet := runes.Predicate(fnSoftHyphen)
+ transformer := transform.Chain(norm.NFD, runes.Remove(softHyphenSet), norm.NFC)
+ return transform.NewReader(r, transformer)
+}
diff --git a/vendor/github.com/go-shiori/go-readability/.gitattributes b/vendor/github.com/go-shiori/go-readability/.gitattributes
new file mode 100644
index 00000000..6c04321d
--- /dev/null
+++ b/vendor/github.com/go-shiori/go-readability/.gitattributes
@@ -0,0 +1 @@
+test-pages/* linguist-vendored
\ No newline at end of file
diff --git a/vendor/github.com/go-shiori/go-readability/.gitignore b/vendor/github.com/go-shiori/go-readability/.gitignore
new file mode 100644
index 00000000..186bf5a8
--- /dev/null
+++ b/vendor/github.com/go-shiori/go-readability/.gitignore
@@ -0,0 +1,5 @@
+.vscode/
+js/*
+
+# Hidden script for development
+scripts/internal/
\ No newline at end of file
diff --git a/vendor/github.com/go-shiori/go-readability/.travis.yml b/vendor/github.com/go-shiori/go-readability/.travis.yml
new file mode 100644
index 00000000..647a2880
--- /dev/null
+++ b/vendor/github.com/go-shiori/go-readability/.travis.yml
@@ -0,0 +1,16 @@
+language: go
+sudo: true
+matrix:
+ include:
+ - go: 1.x
+ env: LATEST=true
+ - go: "1.13"
+
+install:
+ - go get github.com/go-shiori/go-readability
+
+script:
+ - go get -t -v ./...
+ - diff -u <(echo -n) <(gofmt -d .)
+ - go vet $(go list ./... | grep -v /vendor/)
+ - go test -v -race ./...
\ No newline at end of file
diff --git a/vendor/github.com/go-shiori/go-readability/LICENSE b/vendor/github.com/go-shiori/go-readability/LICENSE
new file mode 100644
index 00000000..e4283b76
--- /dev/null
+++ b/vendor/github.com/go-shiori/go-readability/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Radhi Fadlillah
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/go-shiori/go-readability/README.md b/vendor/github.com/go-shiori/go-readability/README.md
new file mode 100644
index 00000000..7729c517
--- /dev/null
+++ b/vendor/github.com/go-shiori/go-readability/README.md
@@ -0,0 +1,169 @@
+# Go-Readability [![Go Reference][go-ref-badge]][go-ref] [![PayPal][paypal-badge]][paypal] [![Ko-fi][kofi-badge]][kofi]
+
+Go-Readability is a Go package that find the main readable content and the metadata from a HTML page. It works by removing clutter like buttons, ads, background images, script, etc.
+
+This package is based from [Readability.js] by [Mozilla] and written line by line to make sure it looks and works as similar as possible. This way, hopefully all web page that can be parsed by Readability.js are parse-able by go-readability as well.
+
+## Table of Contents
+
+- [Table of Contents](#table-of-contents)
+- [Status](#status)
+- [Installation](#installation)
+- [Example](#example)
+- [Command Line Usage](#command-line-usage)
+- [Licenses](#licenses)
+
+## Status
+
+This package is stable enough for use and up to date with Readability.js [v0.4.4][last-version] (commit [`b359811`][last-commit]).
+
+## Installation
+
+To install this package, just run `go get` :
+
+```
+go get -u -v github.com/go-shiori/go-readability
+```
+
+## Example
+
+To get the readable content from an URL, you can use `readability.FromURL`. It will fetch the web page from specified url, check if it's readable, then parses the response to find the readable content :
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+ "time"
+
+ readability "github.com/go-shiori/go-readability"
+)
+
+var (
+ urls = []string{
+ // this one is article, so it's parse-able
+ "https://www.nytimes.com/2019/02/20/climate/climate-national-security-threat.html",
+ // while this one is not an article, so readability will fail to parse.
+ "https://www.nytimes.com/",
+ }
+)
+
+func main() {
+ for i, url := range urls {
+ article, err := readability.FromURL(url, 30*time.Second)
+ if err != nil {
+ log.Fatalf("failed to parse %s, %v\n", url, err)
+ }
+
+ dstTxtFile, _ := os.Create(fmt.Sprintf("text-%02d.txt", i+1))
+ defer dstTxtFile.Close()
+ dstTxtFile.WriteString(article.TextContent)
+
+ dstHTMLFile, _ := os.Create(fmt.Sprintf("html-%02d.html", i+1))
+ defer dstHTMLFile.Close()
+ dstHTMLFile.WriteString(article.Content)
+
+ fmt.Printf("URL : %s\n", url)
+ fmt.Printf("Title : %s\n", article.Title)
+ fmt.Printf("Author : %s\n", article.Byline)
+ fmt.Printf("Length : %d\n", article.Length)
+ fmt.Printf("Excerpt : %s\n", article.Excerpt)
+ fmt.Printf("SiteName: %s\n", article.SiteName)
+ fmt.Printf("Image : %s\n", article.Image)
+ fmt.Printf("Favicon : %s\n", article.Favicon)
+ fmt.Printf("Text content saved to \"text-%02d.txt\"\n", i+1)
+ fmt.Printf("HTML content saved to \"html-%02d.html\"\n", i+1)
+ fmt.Println()
+ }
+}
+```
+
+However, sometimes you want to parse an URL no matter if it's an article or not. For example is when you only want to get metadata of the page. To do that, you have to download the page manually using `http.Get`, then parse it using `readability.FromReader` :
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+ "net/http"
+
+ readability "github.com/go-shiori/go-readability"
+)
+
+var (
+ urls = []string{
+ // Both will be parse-able now
+ "https://www.nytimes.com/2019/02/20/climate/climate-national-security-threat.html",
+ // But this one will not have any content
+ "https://www.nytimes.com/",
+ }
+)
+
+func main() {
+ for _, url := range urls {
+ resp, err := http.Get(url)
+ if err != nil {
+ log.Fatalf("failed to download %s: %v\n", url, err)
+ }
+ defer resp.Body.Close()
+
+ article, err := readability.FromReader(resp.Body, url)
+ if err != nil {
+ log.Fatalf("failed to parse %s: %v\n", url, err)
+ }
+
+ fmt.Printf("URL : %s\n", url)
+ fmt.Printf("Title : %s\n", article.Title)
+ fmt.Printf("Author : %s\n", article.Byline)
+ fmt.Printf("Length : %d\n", article.Length)
+ fmt.Printf("Excerpt : %s\n", article.Excerpt)
+ fmt.Printf("SiteName: %s\n", article.SiteName)
+ fmt.Printf("Image : %s\n", article.Image)
+ fmt.Printf("Favicon : %s\n", article.Favicon)
+ fmt.Println()
+ }
+}
+```
+
+## Command Line Usage
+
+You can also use `go-readability` as command line app. To do that, first install the CLI :
+
+```
+go get -u -v github.com/go-shiori/go-readability/cmd/...
+```
+
+Now you can use it by running `go-readability` in your terminal :
+
+```
+$ go-readability -h
+
+go-readability is parser to fetch the readable content of a web page.
+The source can be an url or existing file in your storage.
+
+Usage:
+ go-readability [flags] source
+
+Flags:
+ -h, --help help for go-readability
+ -m, --metadata only print the page's metadata
+```
+
+## Licenses
+
+Go-Readability is distributed under [MIT license][mit], which means you can use and modify it however you want. However, if you make an enhancement for it, if possible, please send a pull request. If you like this project, please consider donating to me either via [PayPal][paypal] or [Ko-Fi][kofi].
+
+[go-ref]: https://pkg.go.dev/github.com/go-shiori/go-readability
+[go-ref-badge]: https://img.shields.io/static/v1?label=&message=Reference&color=007d9c&logo=go&logoColor=white
+[paypal]: https://www.paypal.me/RadhiFadlillah
+[paypal-badge]: https://img.shields.io/static/v1?label=&message=PayPal&color=00457C&logo=paypal&logoColor=white
+[kofi]: https://ko-fi.com/radhifadlillah
+[kofi-badge]: https://img.shields.io/static/v1?label=&message=Ko-fi&color=F16061&logo=ko-fi&logoColor=white
+[readability.js]: https://github.com/mozilla/readability
+[mozilla]: https://github.com/mozilla
+[last-version]: https://github.com/mozilla/readability/tree/0.4.4
+[last-commit]: https://github.com/mozilla/readability/commit/b359811927a4bb2323eba085be004978fb18a926
+[mit]: https://choosealicense.com/licenses/mit/
diff --git a/vendor/github.com/go-shiori/go-readability/parser-check.go b/vendor/github.com/go-shiori/go-readability/parser-check.go
new file mode 100644
index 00000000..e3f71b1c
--- /dev/null
+++ b/vendor/github.com/go-shiori/go-readability/parser-check.go
@@ -0,0 +1,79 @@
+package readability
+
+import (
+ "io"
+ "math"
+ "strings"
+
+ "github.com/go-shiori/dom"
+ "golang.org/x/net/html"
+)
+
+// Check checks whether the input is readable without parsing the whole thing.
+func (ps *Parser) Check(input io.Reader) bool {
+ // Parse input
+ doc, err := dom.Parse(input)
+ if err != nil {
+ return false
+ }
+
+ return ps.CheckDocument(doc)
+}
+
+// CheckDocument checks whether the document is readable without parsing the whole thing.
+func (ps *Parser) CheckDocument(doc *html.Node) bool {
+ // Get
and
nodes. + nodes := dom.QuerySelectorAll(doc, "p, pre, article") + + // Also getnodes which have
node(s) and append + // them into the `nodes` variable. + // Some articles' DOM structures might look like : + // + //+ // Sentences+ // + // So we need to make sure only fetch the div once. + // To do so, we will use map as dictionary. + tracker := make(map[*html.Node]struct{}) + for _, br := range dom.QuerySelectorAll(doc, "div > br") { + if br.Parent == nil { + continue + } + + if _, exist := tracker[br.Parent]; !exist { + tracker[br.Parent] = struct{}{} + nodes = append(nodes, br.Parent) + } + } + + // This is a little cheeky, we use the accumulator 'score' to decide what + // to return from this callback. + score := float64(0) + return ps.someNode(nodes, func(node *html.Node) bool { + if !ps.isProbablyVisible(node) { + return false + } + + matchString := dom.ClassName(node) + " " + dom.ID(node) + if rxUnlikelyCandidates.MatchString(matchString) && + !rxOkMaybeItsACandidate.MatchString(matchString) { + return false + } + + if dom.TagName(node) == "p" && ps.hasAncestorTag(node, "li", -1, nil) { + return false + } + + nodeText := strings.TrimSpace(dom.TextContent(node)) + nodeTextLength := len(nodeText) + if nodeTextLength < 140 { + return false + } + + score += math.Sqrt(float64(nodeTextLength - 140)) + return score > 20 + }) +} diff --git a/vendor/github.com/go-shiori/go-readability/parser-parse.go b/vendor/github.com/go-shiori/go-readability/parser-parse.go new file mode 100644 index 00000000..235ce62b --- /dev/null +++ b/vendor/github.com/go-shiori/go-readability/parser-parse.go @@ -0,0 +1,129 @@ +package readability + +import ( + "fmt" + "io" + nurl "net/url" + "strings" + + "github.com/go-shiori/dom" + "golang.org/x/net/html" +) + +// Parse parses a reader and find the main readable content. +func (ps *Parser) Parse(input io.Reader, pageURL *nurl.URL) (Article, error) { + // Parse input + doc, err := dom.Parse(input) + if err != nil { + return Article{}, fmt.Errorf("failed to parse input: %v", err) + } + + return ps.ParseDocument(doc, pageURL) +} + +// ParseDocument parses the specified document and find the main readable content. +func (ps *Parser) ParseDocument(doc *html.Node, pageURL *nurl.URL) (Article, error) { + // Clone document to make sure the original kept untouched + ps.doc = dom.Clone(doc, true) + + // Reset parser data + ps.articleTitle = "" + ps.articleByline = "" + ps.articleDir = "" + ps.articleSiteName = "" + ps.documentURI = pageURL + ps.attempts = []parseAttempt{} + ps.flags = flags{ + stripUnlikelys: true, + useWeightClasses: true, + cleanConditionally: true, + } + + // Avoid parsing too large documents, as per configuration option + if ps.MaxElemsToParse > 0 { + numTags := len(dom.GetElementsByTagName(ps.doc, "*")) + if numTags > ps.MaxElemsToParse { + return Article{}, fmt.Errorf("documents too large: %d elements", numTags) + } + } + + // Unwrap image from noscript + ps.unwrapNoscriptImages(ps.doc) + + // Extract JSON-LD metadata before removing scripts + var jsonLd map[string]string + if !ps.DisableJSONLD { + jsonLd, _ = ps.getJSONLD() + } + + // Remove script tags from the document. + ps.removeScripts(ps.doc) + + // Prepares the HTML document + ps.prepDocument() + + // Fetch metadata + metadata := ps.getArticleMetadata(jsonLd) + ps.articleTitle = metadata["title"] + + // Try to grab article content + finalHTMLContent := "" + finalTextContent := "" + articleContent := ps.grabArticle() + var readableNode *html.Node + + if articleContent != nil { + ps.postProcessContent(articleContent) + + // If we haven't found an excerpt in the article's metadata, + // use the article's first paragraph as the excerpt. This is used + // for displaying a preview of the article's content. + if metadata["excerpt"] == "" { + paragraphs := dom.GetElementsByTagName(articleContent, "p") + if len(paragraphs) > 0 { + metadata["excerpt"] = strings.TrimSpace(dom.TextContent(paragraphs[0])) + } + } + + readableNode = dom.FirstElementChild(articleContent) + finalHTMLContent = dom.InnerHTML(articleContent) + finalTextContent = dom.TextContent(articleContent) + finalTextContent = strings.TrimSpace(finalTextContent) + } + + finalByline := metadata["byline"] + if finalByline == "" { + finalByline = ps.articleByline + } + + // Excerpt is an supposed to be short and concise, + // so it shouldn't have any new line + excerpt := strings.TrimSpace(metadata["excerpt"]) + excerpt = strings.Join(strings.Fields(excerpt), " ") + + // go-readability special: + // Internet is dangerous and weird, and sometimes we will find + // metadata isn't encoded using a valid Utf-8, so here we check it. + var replacementTitle string + if pageURL != nil { + replacementTitle = pageURL.String() + } + + validTitle := strings.ToValidUTF8(ps.articleTitle, replacementTitle) + validByline := strings.ToValidUTF8(finalByline, "") + validExcerpt := strings.ToValidUTF8(excerpt, "") + + return Article{ + Title: validTitle, + Byline: validByline, + Node: readableNode, + Content: finalHTMLContent, + TextContent: finalTextContent, + Length: charCount(finalTextContent), + Excerpt: validExcerpt, + SiteName: metadata["siteName"], + Image: metadata["image"], + Favicon: metadata["favicon"], + Language: ps.articleLang, + }, nil +} diff --git a/vendor/github.com/go-shiori/go-readability/parser.go b/vendor/github.com/go-shiori/go-readability/parser.go new file mode 100644 index 00000000..b4c7c83f --- /dev/null +++ b/vendor/github.com/go-shiori/go-readability/parser.go @@ -0,0 +1,2300 @@ +package readability + +import ( + "encoding/json" + "fmt" + shtml "html" + "log" + "math" + nurl "net/url" + "regexp" + "sort" + "strconv" + "strings" + + "github.com/go-shiori/dom" + "golang.org/x/net/html" +) + +// All of the regular expressions in use within readability. +// Defined up here so we don't instantiate them repeatedly in loops *. +var ( + rxUnlikelyCandidates = regexp.MustCompile(`(?i)-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`) + rxOkMaybeItsACandidate = regexp.MustCompile(`(?i)and|article|body|column|content|main|shadow`) + rxPositive = regexp.MustCompile(`(?i)article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story`) + rxNegative = regexp.MustCompile(`(?i)-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget`) + rxByline = regexp.MustCompile(`(?i)byline|author|dateline|writtenby|p-author`) + rxNormalize = regexp.MustCompile(`(?i)\s{2,}`) + rxVideosx = regexp.MustCompile(`(?i)//(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)`) + rxTokenize = regexp.MustCompile(`(?i)\W+`) + rxWhitespace = regexp.MustCompile(`(?i)^\s*$`) + rxHasContent = regexp.MustCompile(`(?i)\S$`) + rxHashURL = regexp.MustCompile(`(?i)^#.+`) + rxPropertyPattern = regexp.MustCompile(`(?i)\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|site_name|image\S*)\s*`) + rxNamePattern = regexp.MustCompile(`(?i)^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name|image)\s*$`) + rxTitleSeparator = regexp.MustCompile(`(?i) [\|\-\\/>»] `) + rxTitleHierarchySep = regexp.MustCompile(`(?i) [\\/>»] `) + rxTitleRemoveFinalPart = regexp.MustCompile(`(?i)(.*)[\|\-\\/>»] .*`) + rxTitleRemove1stPart = regexp.MustCompile(`(?i)[^\|\-\\/>»]*[\|\-\\/>»](.*)`) + rxTitleAnySeparator = regexp.MustCompile(`(?i)[\|\-\\/>»]+`) + rxDisplayNone = regexp.MustCompile(`(?i)display\s*:\s*none`) + rxSentencePeriod = regexp.MustCompile(`(?i)\.( |$)`) + rxShareElements = regexp.MustCompile(`(?i)(\b|_)(share|sharedaddy)(\b|_)`) + rxFaviconSize = regexp.MustCompile(`(?i)(\d+)x(\d+)`) + rxLazyImageSrcset = regexp.MustCompile(`(?i)\.(jpg|jpeg|png|webp)\s+\d`) + rxLazyImageSrc = regexp.MustCompile(`(?i)^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$`) + rxImgExtensions = regexp.MustCompile(`(?i)\.(jpg|jpeg|png|webp)`) + rxSrcsetURL = regexp.MustCompile(`(?i)(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))`) + rxB64DataURL = regexp.MustCompile(`(?i)^data:\s*([^\s;,]+)\s*;\s*base64\s*,`) + rxJsonLdArticleTypes = regexp.MustCompile(`(?i)^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$`) + rxCDATA = regexp.MustCompile(`^\s*\s*$`) + rxSchemaOrg = regexp.MustCompile(`(?i)^https?\:\/\/schema\.org$`) +) + +// Constants that used by readability. +var ( + unlikelyRoles = sliceToMap("menu", "menubar", "complementary", "navigation", "alert", "alertdialog", "dialog") + divToPElems = sliceToMap("blockquote", "dl", "div", "img", "ol", "p", "pre", "table", "ul", "select") + alterToDivExceptions = []string{"div", "article", "section", "p"} + presentationalAttributes = []string{"align", "background", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "hspace", "rules", "style", "valign", "vspace"} + deprecatedSizeAttributeElems = []string{"table", "th", "td", "hr", "pre"} + phrasingElems = []string{ + "abbr", "audio", "b", "bdo", "br", "button", "cite", "code", "data", + "datalist", "dfn", "em", "embed", "i", "img", "input", "kbd", "label", + "mark", "math", "meter", "noscript", "object", "output", "progress", "q", + "ruby", "samp", "script", "select", "small", "span", "strong", "sub", + "sup", "textarea", "time", "var", "wbr"} +) + +// flags is flags that used by parser. +type flags struct { + stripUnlikelys bool + useWeightClasses bool + cleanConditionally bool +} + +// parseAttempt is container for the result of previous parse attempts. +type parseAttempt struct { + articleContent *html.Node + textLength int +} + +// Article is the final readable content. +type Article struct { + Title string + Byline string + Node *html.Node + Content string + TextContent string + Length int + Excerpt string + SiteName string + Image string + Favicon string + Language string +} + +// Parser is the parser that parses the page to get the readable content. +type Parser struct { + // MaxElemsToParse is the max number of nodes supported by this + // parser. Default: 0 (no limit) + MaxElemsToParse int + // NTopCandidates is the number of top candidates to consider when + // analysing how tight the competition is among candidates. + NTopCandidates int + // CharThresholds is the default number of chars an article must + // have in order to return a result + CharThresholds int + // ClassesToPreserve are the classes that readability sets itself. + ClassesToPreserve []string + // KeepClasses specify whether the classes should be stripped or not. + KeepClasses bool + // TagsToScore is element tags to score by default. + TagsToScore []string + // Debug determines if the log should be printed or not. Default: false. + Debug bool + // DisableJSONLD determines if metadata in JSON+LD will be extracted + // or not. Default: false. + DisableJSONLD bool + // AllowedVideoRegex is a regular expression that matches video URLs that should be + // allowed to be included in the article content. If undefined, it will use default filter. + AllowedVideoRegex *regexp.Regexp + + doc *html.Node + documentURI *nurl.URL + articleTitle string + articleByline string + articleDir string + articleSiteName string + articleLang string + attempts []parseAttempt + flags flags +} + +// NewParser returns new Parser which set up with default value. +func NewParser() Parser { + return Parser{ + MaxElemsToParse: 0, + NTopCandidates: 5, + CharThresholds: 500, + ClassesToPreserve: []string{"page"}, + KeepClasses: false, + TagsToScore: []string{"section", "h2", "h3", "h4", "h5", "h6", "p", "td", "pre"}, + Debug: false, + } +} + +// postProcessContent runs any post-process modifications to article +// content as necessary. +func (ps *Parser) postProcessContent(articleContent *html.Node) { + // Readability cannot open relative uris so we convert them to absolute uris. + ps.fixRelativeURIs(articleContent) + + ps.simplifyNestedElements(articleContent) + + // Remove classes. + if !ps.KeepClasses { + ps.cleanClasses(articleContent) + } + + // Remove readability attributes. + ps.clearReadabilityAttr(articleContent) +} + +// removeNodes iterates over a NodeList, calls `filterFn` for each node +// and removes node if function returned `true`. If function is not +// passed, removes all the nodes in node list. +func (ps *Parser) removeNodes(nodeList []*html.Node, filterFn func(*html.Node) bool) { + for i := len(nodeList) - 1; i >= 0; i-- { + node := nodeList[i] + parentNode := node.Parent + if parentNode != nil && (filterFn == nil || filterFn(node)) { + parentNode.RemoveChild(node) + } + } +} + +// replaceNodeTags iterates over a NodeList, and calls setNodeTag for +// each node. +func (ps *Parser) replaceNodeTags(nodeList []*html.Node, newTagName string) { + for i := len(nodeList) - 1; i >= 0; i-- { + node := nodeList[i] + ps.setNodeTag(node, newTagName) + } +} + +// forEachNode iterates over a NodeList and runs fn on each node. +func (ps *Parser) forEachNode(nodeList []*html.Node, fn func(*html.Node, int)) { + for i := 0; i < len(nodeList); i++ { + fn(nodeList[i], i) + } +} + +// someNode iterates over a NodeList, return true if any of the +// provided iterate function calls returns true, false otherwise. +func (ps *Parser) someNode(nodeList []*html.Node, fn func(*html.Node) bool) bool { + for i := 0; i < len(nodeList); i++ { + if fn(nodeList[i]) { + return true + } + } + return false +} + +// everyNode iterates over a NodeList, return true if all of the +// provided iterate function calls returns true, false otherwise. +func (ps *Parser) everyNode(nodeList []*html.Node, fn func(*html.Node) bool) bool { + for i := 0; i < len(nodeList); i++ { + if !fn(nodeList[i]) { + return false + } + } + return true +} + +// concatNodeLists concats all nodelists passed as arguments. +func (ps *Parser) concatNodeLists(nodeLists ...[]*html.Node) []*html.Node { + var result []*html.Node + for i := 0; i < len(nodeLists); i++ { + result = append(result, nodeLists[i]...) + } + return result +} + +// getAllNodesWithTag returns all nodes that has tag inside tagNames. +func (ps *Parser) getAllNodesWithTag(node *html.Node, tagNames ...string) []*html.Node { + var result []*html.Node + for i := 0; i < len(tagNames); i++ { + result = append(result, dom.GetElementsByTagName(node, tagNames[i])...) + } + return result +} + +// cleanClasses removes the class="" attribute from every element in the +// given subtree, except those that match CLASSES_TO_PRESERVE and the +// classesToPreserve array from the options object. +func (ps *Parser) cleanClasses(node *html.Node) { + nodeClassName := dom.ClassName(node) + preservedClassName := []string{} + for _, class := range strings.Fields(nodeClassName) { + if indexOf(ps.ClassesToPreserve, class) != -1 { + preservedClassName = append(preservedClassName, class) + } + } + + if len(preservedClassName) > 0 { + dom.SetAttribute(node, "class", strings.Join(preservedClassName, " ")) + } else { + dom.RemoveAttribute(node, "class") + } + + for child := dom.FirstElementChild(node); child != nil; child = dom.NextElementSibling(child) { + ps.cleanClasses(child) + } +} + +// fixRelativeURIs converts each and uri in the given element +// to an absolute URI, ignoring #ref URIs. +func (ps *Parser) fixRelativeURIs(articleContent *html.Node) { + links := ps.getAllNodesWithTag(articleContent, "a") + ps.forEachNode(links, func(link *html.Node, _ int) { + href := dom.GetAttribute(link, "href") + if href == "" { + return + } + + // Remove links with javascript: URIs, since they won't + // work after scripts have been removed from the page. + if strings.HasPrefix(href, "javascript:") { + linkChilds := dom.ChildNodes(link) + + if len(linkChilds) == 1 && linkChilds[0].Type == html.TextNode { + // If the link only contains simple text content, + // it can be converted to a text node + text := dom.CreateTextNode(dom.TextContent(link)) + dom.ReplaceChild(link.Parent, text, link) + } else { + // If the link has multiple children, they should + // all be preserved + container := dom.CreateElement("span") + for link.FirstChild != nil { + dom.AppendChild(container, link.FirstChild) + } + dom.ReplaceChild(link.Parent, container, link) + } + } else { + newHref := toAbsoluteURI(href, ps.documentURI) + if newHref == "" { + dom.RemoveAttribute(link, "href") + } else { + dom.SetAttribute(link, "href", newHref) + } + } + }) + + medias := ps.getAllNodesWithTag(articleContent, "img", "picture", "figure", "video", "audio", "source") + ps.forEachNode(medias, func(media *html.Node, _ int) { + src := dom.GetAttribute(media, "src") + poster := dom.GetAttribute(media, "poster") + srcset := dom.GetAttribute(media, "srcset") + + if src != "" { + newSrc := toAbsoluteURI(src, ps.documentURI) + dom.SetAttribute(media, "src", newSrc) + } + + if poster != "" { + newPoster := toAbsoluteURI(poster, ps.documentURI) + dom.SetAttribute(media, "poster", newPoster) + } + + if srcset != "" { + newSrcset := rxSrcsetURL.ReplaceAllStringFunc(srcset, func(s string) string { + p := rxSrcsetURL.FindStringSubmatch(s) + return toAbsoluteURI(p[1], ps.documentURI) + p[2] + p[3] + }) + + dom.SetAttribute(media, "srcset", newSrcset) + } + }) +} + +func (ps *Parser) simplifyNestedElements(articleContent *html.Node) { + node := articleContent + + for node != nil { + nodeID := dom.ID(node) + nodeTagName := dom.TagName(node) + + if node.Parent != nil && (nodeTagName == "div" || nodeTagName == "section") && + !strings.HasPrefix(nodeID, "readability") { + if ps.isElementWithoutContent(node) { + node = ps.removeAndGetNext(node) + continue + } + + if ps.hasSingleTagInsideElement(node, "div") || ps.hasSingleTagInsideElement(node, "section") { + child := dom.Children(node)[0] + for _, attr := range node.Attr { + dom.SetAttribute(child, attr.Key, attr.Val) + } + + dom.ReplaceChild(node.Parent, child, node) + node = child + continue + } + } + + node = ps.getNextNode(node, false) + } +} + +// getArticleTitle attempts to get the article title. +func (ps *Parser) getArticleTitle() string { + doc := ps.doc + curTitle := "" + origTitle := "" + titleHadHierarchicalSeparators := false + + // If they had an element with tag "title" in their HTML + if nodes := dom.GetElementsByTagName(doc, "title"); len(nodes) > 0 { + origTitle = ps.getInnerText(nodes[0], true) + curTitle = origTitle + } + + // If there's a separator in the title, first remove the final part + if rxTitleSeparator.MatchString(curTitle) { + titleHadHierarchicalSeparators = rxTitleHierarchySep.MatchString(curTitle) + curTitle = rxTitleRemoveFinalPart.ReplaceAllString(origTitle, "$1") + + // If the resulting title is too short (3 words or fewer), remove + // the first part instead: + if wordCount(curTitle) < 3 { + curTitle = rxTitleRemove1stPart.ReplaceAllString(origTitle, "$1") + } + } else if strings.Contains(curTitle, ": ") { + // Check if we have an heading containing this exact string, so + // we could assume it's the full title. + headings := ps.concatNodeLists( + dom.GetElementsByTagName(doc, "h1"), + dom.GetElementsByTagName(doc, "h2"), + ) + + trimmedTitle := strings.TrimSpace(curTitle) + match := ps.someNode(headings, func(heading *html.Node) bool { + return strings.TrimSpace(dom.TextContent(heading)) == trimmedTitle + }) + + // If we don't, let's extract the title out of the original + // title string. + if !match { + curTitle = origTitle[strings.LastIndex(origTitle, ":")+1:] + + // If the title is now too short, try the first colon instead: + if wordCount(curTitle) < 3 { + curTitle = origTitle[strings.Index(origTitle, ":")+1:] + // But if we have too many words before the colon there's + // something weird with the titles and the H tags so let's + // just use the original title instead + } else if wordCount(origTitle[:strings.Index(origTitle, ":")]) > 5 { + curTitle = origTitle + } + } + } else if charCount(curTitle) > 150 || charCount(curTitle) < 15 { + if hOnes := dom.GetElementsByTagName(doc, "h1"); len(hOnes) == 1 { + curTitle = ps.getInnerText(hOnes[0], true) + } + } + + curTitle = strings.TrimSpace(curTitle) + curTitle = rxNormalize.ReplaceAllString(curTitle, " ") + // If we now have 4 words or fewer as our title, and either no + // 'hierarchical' separators (\, /, > or ») were found in the original + // title or we decreased the number of words by more than 1 word, use + // the original title. + curTitleWordCount := wordCount(curTitle) + tmpOrigTitle := rxTitleAnySeparator.ReplaceAllString(origTitle, "") + + if curTitleWordCount <= 4 && + (!titleHadHierarchicalSeparators || + curTitleWordCount != wordCount(tmpOrigTitle)-1) { + curTitle = origTitle + } + + return curTitle +} + +// prepDocument prepares the HTML document for readability to scrape it. +// This includes things like stripping javascript, CSS, and handling +// terrible markup. +func (ps *Parser) prepDocument() { + doc := ps.doc + + // ADDITIONAL, not exist in readability.js: + // Remove all comments, + ps.removeComments(doc) + + // Remove all style tags in head + ps.removeNodes(dom.GetElementsByTagName(doc, "style"), nil) + + if nodes := dom.GetElementsByTagName(doc, "body"); len(nodes) > 0 && nodes[0] != nil { + ps.replaceBrs(nodes[0]) + } + + ps.replaceNodeTags(dom.GetElementsByTagName(doc, "font"), "span") +} + +// nextNode finds the next element, starting from the given node, and +// ignoring whitespace in between. If the given node is an element, the +// same node is returned. +func (ps *Parser) nextNode(node *html.Node) *html.Node { + next := node + for next != nil && next.Type != html.ElementNode && rxWhitespace.MatchString(dom.TextContent(next)) { + next = next.NextSibling + } + return next +} + +// replaceBrs replaces 2 or more successive
+ //
+ // Sentences
+ //
with a single. +// Whitespace between
elements are ignored. For example: +// +//foo+// +// will become: +// +//
bar
abcfoo+func (ps *Parser) replaceBrs(elem *html.Node) { + ps.forEachNode(ps.getAllNodesWithTag(elem, "br"), func(br *html.Node, _ int) { + next := br.NextSibling + + // Whether 2 or more
barabc
elements have been found and replaced + // with ablock. + replaced := false + + // If we find a
chain, remove the
s until we hit another + // element or non-whitespace. This leaves behind the first
+ // in the chain (which will be replaced with alater). + for { + next = ps.nextNode(next) + if next == nil || dom.TagName(next) != "br" { + break + } + + replaced = true + brSibling := next.NextSibling + next.Parent.RemoveChild(next) + next = brSibling + } + + // If we removed a
chain, replace the remaining
with a. Add + // all sibling nodes as children of the
until we hit another
+ // chain. + if replaced { + p := dom.CreateElement("p") + dom.ReplaceChild(br.Parent, p, br) + + next = p.NextSibling + for next != nil { + // If we've hit another
, we're done adding children to this. + if dom.TagName(next) == "br" { + nextElem := ps.nextNode(next.NextSibling) + if nextElem != nil && dom.TagName(nextElem) == "br" { + break + } + } + + if !ps.isPhrasingContent(next) { + break + } + + // Otherwise, make this node a child of the new
. + sibling := next.NextSibling + dom.AppendChild(p, next) + next = sibling + } + + for p.LastChild != nil && ps.isWhitespace(p.LastChild) { + p.RemoveChild(p.LastChild) + } + + if dom.TagName(p.Parent) == "p" { + ps.setNodeTag(p.Parent, "div") + } + } + }) +} + +// setNodeTag changes tag of the node to newTagName. +func (ps *Parser) setNodeTag(node *html.Node, newTagName string) { + if node.Type == html.ElementNode { + node.Data = newTagName + } +} + +// prepArticle prepares the article node for display. Clean out any +// inline styles, iframes, forms, strip extraneous
tags, etc. +func (ps *Parser) prepArticle(articleContent *html.Node) { + ps.cleanStyles(articleContent) + + // Check for data tables before we continue, to avoid removing + // items in those tables, which will often be isolated even + // though they're visually linked to other content-ful elements + // (text, images, etc.). + ps.markDataTables(articleContent) + + ps.fixLazyImages(articleContent) + + // Clean out junk from the article content + ps.cleanConditionally(articleContent, "form") + ps.cleanConditionally(articleContent, "fieldset") + ps.clean(articleContent, "object") + ps.clean(articleContent, "embed") + ps.clean(articleContent, "footer") + ps.clean(articleContent, "link") + ps.clean(articleContent, "aside") + + // Clean out elements have "share" in their id/class combinations + // from final top candidates, which means we don't remove the top + // candidates even they have "share". + shareElementThreshold := ps.CharThresholds + + ps.forEachNode(dom.Children(articleContent), func(topCandidate *html.Node, _ int) { + ps.cleanMatchedNodes(topCandidate, func(node *html.Node, nodeClassID string) bool { + return rxShareElements.MatchString(nodeClassID) && charCount(dom.TextContent(node)) < shareElementThreshold + }) + }) + + ps.clean(articleContent, "iframe") + ps.clean(articleContent, "input") + ps.clean(articleContent, "textarea") + ps.clean(articleContent, "select") + ps.clean(articleContent, "button") + ps.cleanHeaders(articleContent) + + // Do these last as the previous stuff may have removed junk + // that will affect these + ps.cleanConditionally(articleContent, "table") + ps.cleanConditionally(articleContent, "ul") + ps.cleanConditionally(articleContent, "div") + + // Replace H1 with H2 as H1 should be only title that is displayed separately + ps.replaceNodeTags(ps.getAllNodesWithTag(articleContent, "h1"), "h2") + + // Remove extra paragraphs + ps.removeNodes(dom.GetElementsByTagName(articleContent, "p"), func(p *html.Node) bool { + imgCount := len(dom.GetElementsByTagName(p, "img")) + embedCount := len(dom.GetElementsByTagName(p, "embed")) + objectCount := len(dom.GetElementsByTagName(p, "object")) + // At this point, nasty iframes have been removed, only + // remain embedded video ones. + iframeCount := len(dom.GetElementsByTagName(p, "iframe")) + totalCount := imgCount + embedCount + objectCount + iframeCount + + return totalCount == 0 && ps.getInnerText(p, false) == "" + }) + + ps.forEachNode(dom.GetElementsByTagName(articleContent, "br"), func(br *html.Node, _ int) { + next := ps.nextNode(br.NextSibling) + if next != nil && dom.TagName(next) == "p" { + br.Parent.RemoveChild(br) + } + }) + + // Remove single-cell tables + ps.forEachNode(dom.GetElementsByTagName(articleContent, "table"), func(table *html.Node, _ int) { + tbody := table + if ps.hasSingleTagInsideElement(table, "tbody") { + tbody = dom.FirstElementChild(table) + } + + if ps.hasSingleTagInsideElement(tbody, "tr") { + row := dom.FirstElementChild(tbody) + if ps.hasSingleTagInsideElement(row, "td") { + cell := dom.FirstElementChild(row) + + newTag := "div" + if ps.everyNode(dom.ChildNodes(cell), ps.isPhrasingContent) { + newTag = "p" + } + + ps.setNodeTag(cell, newTag) + dom.ReplaceChild(table.Parent, cell, table) + } + } + }) +} + +// initializeNode initializes a node with the readability score. +// Also checks the className/id for special names to add to its score. +func (ps *Parser) initializeNode(node *html.Node) { + contentScore := float64(ps.getClassWeight(node)) + switch dom.TagName(node) { + case "div": + contentScore += 5 + case "pre", "td", "blockquote": + contentScore += 3 + case "address", "ol", "ul", "dl", "dd", "dt", "li", "form": + contentScore -= 3 + case "h1", "h2", "h3", "h4", "h5", "h6", "th": + contentScore -= 5 + } + + ps.setContentScore(node, contentScore) +} + +// removeAndGetNext remove node and returns its next node. +func (ps *Parser) removeAndGetNext(node *html.Node) *html.Node { + nextNode := ps.getNextNode(node, true) + if node.Parent != nil { + node.Parent.RemoveChild(node) + } + return nextNode +} + +// getNextNode traverses the DOM from node to node, starting at the +// node passed in. Pass true for the second parameter to indicate +// this node itself (and its kids) are going away, and we want the +// next node over. Calling this in a loop will traverse the DOM +// depth-first. +// In Readability.js, ignoreSelfAndKids default to false. +func (ps *Parser) getNextNode(node *html.Node, ignoreSelfAndKids bool) *html.Node { + // First check for kids if those aren't being ignored + if firstChild := dom.FirstElementChild(node); !ignoreSelfAndKids && firstChild != nil { + return firstChild + } + + // Then for siblings... + if sibling := dom.NextElementSibling(node); sibling != nil { + return sibling + } + + // And finally, move up the parent chain *and* find a sibling + // (because this is depth-first traversal, we will have already + // seen the parent nodes themselves). + for { + node = node.Parent + if node == nil || dom.NextElementSibling(node) != nil { + break + } + } + + if node != nil { + return dom.NextElementSibling(node) + } + + return nil +} + +// textSimilarity compares second text to first one. 1 = same text, 0 = completely different text. +// The way it works: it splits both texts into words and then finds words that are unique in +// second text the result is given by the lower length of unique parts. +func (ps *Parser) textSimilarity(textA, textB string) float64 { + tokensA := rxTokenize.Split(strings.ToLower(textA), -1) + tokensA = strFilter(tokensA, func(s string) bool { return s != "" }) + mapTokensA := sliceToMap(tokensA...) + + tokensB := rxTokenize.Split(strings.ToLower(textB), -1) + tokensB = strFilter(tokensB, func(s string) bool { return s != "" }) + uniqueTokensB := strFilter(tokensB, func(s string) bool { + _, existInA := mapTokensA[s] + return !existInA + }) + + mergedB := strings.Join(tokensB, " ") + mergedUniqueB := strings.Join(uniqueTokensB, " ") + distanceB := float64(charCount(mergedUniqueB)) / float64(charCount(mergedB)) + + return 1 - distanceB +} + +// checkByline determines if a node is used as byline. +func (ps *Parser) checkByline(node *html.Node, matchString string) bool { + if ps.articleByline != "" { + return false + } + + rel := dom.GetAttribute(node, "rel") + itemprop := dom.GetAttribute(node, "itemprop") + nodeText := dom.TextContent(node) + if (rel == "author" || strings.Contains(itemprop, "author") || rxByline.MatchString(matchString)) && + ps.isValidByline(nodeText) { + nodeText = strings.TrimSpace(nodeText) + nodeText = strings.Join(strings.Fields(nodeText), " ") + ps.articleByline = nodeText + return true + } + + return false +} + +func (ps *Parser) getTextDensity(node *html.Node, tags ...string) float64 { + textLength := charCount(ps.getInnerText(node, true)) + if textLength == 0 { + return 0 + } + + var childrenLength int + children := ps.getAllNodesWithTag(node, tags...) + ps.forEachNode(children, func(child *html.Node, _ int) { + childrenLength += charCount(ps.getInnerText(child, true)) + }) + + return float64(childrenLength) / float64(textLength) +} + +// getNodeAncestors gets the node's direct parent and grandparents. +// In Readability.js, maxDepth default to 0. +func (ps *Parser) getNodeAncestors(node *html.Node, maxDepth int) []*html.Node { + i := 0 + var ancestors []*html.Node + + for node.Parent != nil { + i++ + ancestors = append(ancestors, node.Parent) + if maxDepth > 0 && i == maxDepth { + break + } + node = node.Parent + } + return ancestors +} + +// grabArticle uses a variety of metrics (content score, classname, +// element types), find the content that is most likely to be the +// stuff a user wants to read. Then return it wrapped up in a div. +func (ps *Parser) grabArticle() *html.Node { + ps.log("**** GRAB ARTICLE ****") + + for { + doc := dom.Clone(ps.doc, true) + + var page *html.Node + if nodes := dom.GetElementsByTagName(doc, "body"); len(nodes) > 0 { + page = nodes[0] + } + + // We can't grab an article if we don't have a page! + if page == nil { + ps.log("no body found in document, abort") + return nil + } + + // First, node prepping. Trash nodes that look cruddy (like ones + // with the class name "comment", etc), and turn divs into P + // tags where they have been used inappropriately (as in, where + // they contain no other block level elements.) + var elementsToScore []*html.Node + var node = dom.DocumentElement(doc) + shouldRemoveTitleHeader := true + + for node != nil { + matchString := dom.ClassName(node) + " " + dom.ID(node) + + if dom.TagName(node) == "html" { + ps.articleLang = dom.GetAttribute(node, "lang") + } + + if !ps.isProbablyVisible(node) { + ps.logf("removing hidden node: %q\n", matchString) + node = ps.removeAndGetNext(node) + continue + } + + // User is not able to see elements applied with both "aria-modal = true" + // and "role = dialog" + if dom.GetAttribute(node, "aria-modal") == "true" && + dom.GetAttribute(node, "role") == "dialog" { + node = ps.removeAndGetNext(node) + continue + } + + // Check to see if this node is a byline, and remove it if + // it is true. + if ps.checkByline(node, matchString) { + node = ps.removeAndGetNext(node) + continue + } + + if shouldRemoveTitleHeader && ps.headerDuplicatesTitle(node) { + ps.logf("removing header: %q duplicate of %q\n", + trim(dom.TextContent(node)), trim(ps.articleTitle)) + shouldRemoveTitleHeader = false + node = ps.removeAndGetNext(node) + continue + } + + // Remove unlikely candidates + nodeTagName := dom.TagName(node) + if ps.flags.stripUnlikelys { + if rxUnlikelyCandidates.MatchString(matchString) && + !rxOkMaybeItsACandidate.MatchString(matchString) && + !ps.hasAncestorTag(node, "table", 3, nil) && + !ps.hasAncestorTag(node, "code", 3, nil) && + nodeTagName != "body" && nodeTagName != "a" { + ps.logf("removing unlikely candidate: %q\n", matchString) + node = ps.removeAndGetNext(node) + continue + } + + role := dom.GetAttribute(node, "role") + if _, include := unlikelyRoles[role]; include { + ps.logf("removing content with role %q: %q\n", role, matchString) + node = ps.removeAndGetNext(node) + continue + } + } + + // Remove DIV, SECTION, and HEADER nodes without any + // content(e.g. text, image, video, or iframe). + switch nodeTagName { + case "div", "section", "header", + "h1", "h2", "h3", "h4", "h5", "h6": + if ps.isElementWithoutContent(node) { + node = ps.removeAndGetNext(node) + continue + } + } + + if indexOf(ps.TagsToScore, nodeTagName) != -1 { + elementsToScore = append(elementsToScore, node) + } + + // Turn all divs that don't have children block level + // elements into p's + if nodeTagName == "div" { + // Put phrasing content into paragraphs. + var p *html.Node + childNode := node.FirstChild + for childNode != nil { + nextSibling := childNode.NextSibling + if ps.isPhrasingContent(childNode) { + if p != nil { + dom.AppendChild(p, childNode) + } else if !ps.isWhitespace(childNode) { + p = dom.CreateElement("p") + dom.AppendChild(p, dom.Clone(childNode, true)) + dom.ReplaceChild(node, p, childNode) + } + } else if p != nil { + for p.LastChild != nil && ps.isWhitespace(p.LastChild) { + p.RemoveChild(p.LastChild) + } + p = nil + } + childNode = nextSibling + } + + // Sites like http://mobile.slate.com encloses each + // paragraph with a DIV element. DIVs with only a P + // element inside and no text content can be safely + // converted into plain P elements to avoid confusing + // the scoring algorithm with DIVs with are, in + // practice, paragraphs. + if ps.hasSingleTagInsideElement(node, "p") && ps.getLinkDensity(node) < 0.25 { + newNode := dom.Children(node)[0] + node, _ = dom.ReplaceChild(node.Parent, newNode, node) + elementsToScore = append(elementsToScore, node) + } else if !ps.hasChildBlockElement(node) { + ps.setNodeTag(node, "p") + elementsToScore = append(elementsToScore, node) + } + } + node = ps.getNextNode(node, false) + } + + // Loop through all paragraphs, and assign a score to them based + // on how content-y they look. Then add their score to their + // parent node. A score is determined by things like number of + // commas, class names, etc. Maybe eventually link density. + var candidates []*html.Node + ps.forEachNode(elementsToScore, func(elementToScore *html.Node, _ int) { + if elementToScore.Parent == nil || dom.TagName(elementToScore.Parent) == "" { + return + } + + // If this paragraph is less than 25 characters, don't even count it. + innerText := ps.getInnerText(elementToScore, true) + if charCount(innerText) < 25 { + return + } + + // Exclude nodes with no ancestor. + ancestors := ps.getNodeAncestors(elementToScore, 5) + if len(ancestors) == 0 { + return + } + + // Add a point for the paragraph itself as a base. + contentScore := 1 + + // Add points for any commas within this paragraph. + contentScore += strings.Count(innerText, ",") + + // For every 100 characters in this paragraph, add another point. Up to 3 points. + contentScore += int(math.Min(math.Floor(float64(charCount(innerText))/100.0), 3.0)) + + // Initialize and score ancestors. + ps.forEachNode(ancestors, func(ancestor *html.Node, level int) { + if dom.TagName(ancestor) == "" || ancestor.Parent == nil || ancestor.Parent.Type != html.ElementNode { + return + } + + if !ps.hasContentScore(ancestor) { + ps.initializeNode(ancestor) + candidates = append(candidates, ancestor) + } + + // Node score divider: + // - parent: 1 (no division) + // - grandparent: 2 + // - great grandparent+: ancestor level * 3 + scoreDivider := 1 + switch level { + case 0: + scoreDivider = 1 + case 1: + scoreDivider = 2 + default: + scoreDivider = level * 3 + } + + ancestorScore := ps.getContentScore(ancestor) + ancestorScore += float64(contentScore) / float64(scoreDivider) + ps.setContentScore(ancestor, ancestorScore) + }) + }) + + // These lines are a bit different compared to Readability.js. + // In Readability.js, they fetch NTopCandidates utilising array + // method like `splice` and `pop`. In Go, array method like that + // is not as simple, especially since we are working with pointer. + // So, here we simply sort top candidates, and limit it to + // max NTopCandidates. + + // Scale the final candidates score based on link density. Good + // content should have a relatively small link density (5% or + // less) and be mostly unaffected by this operation. + for i := 0; i < len(candidates); i++ { + candidate := candidates[i] + candidateScore := ps.getContentScore(candidate) * (1 - ps.getLinkDensity(candidate)) + ps.logf("candidate %q with score: %f\n", dom.OuterHTML(candidate), candidateScore) + ps.setContentScore(candidate, candidateScore) + } + + // After we've calculated scores, sort through all of the possible + // candidate nodes we found and find the one with the highest score. + sort.Slice(candidates, func(i int, j int) bool { + return ps.getContentScore(candidates[i]) > ps.getContentScore(candidates[j]) + }) + + var topCandidates []*html.Node + if len(candidates) > ps.NTopCandidates { + topCandidates = candidates[:ps.NTopCandidates] + } else { + topCandidates = candidates + } + + var topCandidate, parentOfTopCandidate *html.Node + neededToCreateTopCandidate := false + if len(topCandidates) > 0 { + topCandidate = topCandidates[0] + } + + // If we still have no top candidate, just use the body as a last + // resort. We also have to copy the body node so it is something + // we can modify. + if topCandidate == nil || dom.TagName(topCandidate) == "body" { + // Move all of the page's children into topCandidate + topCandidate = dom.CreateElement("div") + neededToCreateTopCandidate = true + // Move everything (not just elements, also text nodes etc.) + // into the container so we even include text directly in the body: + for page.FirstChild != nil { + ps.logf("moving child out: %q\n", dom.OuterHTML(page.FirstChild)) + dom.AppendChild(topCandidate, page.FirstChild) + } + + dom.AppendChild(page, topCandidate) + ps.initializeNode(topCandidate) + } else if topCandidate != nil { + // Find a better top candidate node if it contains (at least three) + // nodes which belong to `topCandidates` array and whose scores are + // quite closed with current `topCandidate` node. + topCandidateScore := ps.getContentScore(topCandidate) + var alternativeCandidateAncestors [][]*html.Node + for i := 1; i < len(topCandidates); i++ { + if ps.getContentScore(topCandidates[i])/topCandidateScore >= 0.75 { + topCandidateAncestors := ps.getNodeAncestors(topCandidates[i], 0) + alternativeCandidateAncestors = append(alternativeCandidateAncestors, topCandidateAncestors) + } + } + + minimumTopCandidates := 3 + if len(alternativeCandidateAncestors) >= minimumTopCandidates { + parentOfTopCandidate = topCandidate.Parent + for parentOfTopCandidate != nil && dom.TagName(parentOfTopCandidate) != "body" { + listContainingThisAncestor := 0 + for ancestorIndex := 0; ancestorIndex < len(alternativeCandidateAncestors) && listContainingThisAncestor < minimumTopCandidates; ancestorIndex++ { + if dom.IncludeNode(alternativeCandidateAncestors[ancestorIndex], parentOfTopCandidate) { + listContainingThisAncestor++ + } + } + + if listContainingThisAncestor >= minimumTopCandidates { + topCandidate = parentOfTopCandidate + break + } + + parentOfTopCandidate = parentOfTopCandidate.Parent + } + } + + if !ps.hasContentScore(topCandidate) { + ps.initializeNode(topCandidate) + } + + // Because of our bonus system, parents of candidates might + // have scores themselves. They get half of the node. There + // won't be nodes with higher scores than our topCandidate, + // but if we see the score going *up* in the first few steps * + // up the tree, that's a decent sign that there might be more + // content lurking in other places that we want to unify in. + // The sibling stuff below does some of that - but only if + // we've looked high enough up the DOM tree. + parentOfTopCandidate = topCandidate.Parent + lastScore := ps.getContentScore(topCandidate) + // The scores shouldn't get too lops. + scoreThreshold := lastScore / 3.0 + for parentOfTopCandidate != nil && dom.TagName(parentOfTopCandidate) != "body" { + if !ps.hasContentScore(parentOfTopCandidate) { + parentOfTopCandidate = parentOfTopCandidate.Parent + continue + } + + parentScore := ps.getContentScore(parentOfTopCandidate) + if parentScore < scoreThreshold { + break + } + + if parentScore > lastScore { + // Alright! We found a better parent to use. + topCandidate = parentOfTopCandidate + break + } + + lastScore = parentScore + parentOfTopCandidate = parentOfTopCandidate.Parent + } + + // If the top candidate is the only child, use parent + // instead. This will help sibling joining logic when + // adjacent content is actually located in parent's + // sibling node. + parentOfTopCandidate = topCandidate.Parent + for parentOfTopCandidate != nil && dom.TagName(parentOfTopCandidate) != "body" && len(dom.Children(parentOfTopCandidate)) == 1 { + topCandidate = parentOfTopCandidate + parentOfTopCandidate = topCandidate.Parent + } + + if !ps.hasContentScore(topCandidate) { + ps.initializeNode(topCandidate) + } + } + + // Now that we have the top candidate, look through its siblings + // for content that might also be related. Things like preambles, + // content split by ads that we removed, etc. + articleContent := dom.CreateElement("div") + siblingScoreThreshold := math.Max(10, ps.getContentScore(topCandidate)*0.2) + + // Keep potential top candidate's parent node to try to get text direction of it later. + topCandidateScore := ps.getContentScore(topCandidate) + topCandidateClassName := dom.ClassName(topCandidate) + + parentOfTopCandidate = topCandidate.Parent + siblings := dom.Children(parentOfTopCandidate) + for s := 0; s < len(siblings); s++ { + sibling := siblings[s] + appendNode := false + + if sibling == topCandidate { + appendNode = true + } else { + contentBonus := float64(0) + + // Give a bonus if sibling nodes and top candidates have the example same classname + if dom.ClassName(sibling) == topCandidateClassName && topCandidateClassName != "" { + contentBonus += topCandidateScore * 0.2 + } + + if ps.hasContentScore(sibling) && ps.getContentScore(sibling)+contentBonus >= siblingScoreThreshold { + appendNode = true + } else if dom.TagName(sibling) == "p" { + linkDensity := ps.getLinkDensity(sibling) + nodeContent := ps.getInnerText(sibling, true) + nodeLength := charCount(nodeContent) + + if nodeLength > 80 && linkDensity < 0.25 { + appendNode = true + } else if nodeLength < 80 && nodeLength > 0 && linkDensity == 0 && + rxSentencePeriod.MatchString(nodeContent) { + appendNode = true + } + } + } + + if appendNode { + // We have a node that isn't a common block level + // element, like a form or td tag. Turn it into a div + // so it doesn't get filtered out later by accident. + if indexOf(alterToDivExceptions, dom.TagName(sibling)) == -1 { + ps.setNodeTag(sibling, "div") + } + + dom.AppendChild(articleContent, sibling) + + // TODO: + // this line is implemented in Readability.js, however + // it doesn't seem to be useful for our port. + // siblings = dom.Children(parentOfTopCandidate) + } + } + + // So we have all of the content that we need. Now we clean + // it up for presentation. + ps.prepArticle(articleContent) + + if neededToCreateTopCandidate { + // We already created a fake div thing, and there wouldn't + // have been any siblings left for the previous loop, so + // there's no point trying to create a new div, and then + // move all the children over. Just assign IDs and class + // names here. No need to append because that already + // happened anyway. + // + // By the way, this line is different with Readability.js. + // In Readability.js, when using `appendChild`, the node is + // still referenced. Meanwhile here, our `appendChild` will + // clone the node, put it in the new place, then delete + // the original. + firstChild := dom.FirstElementChild(articleContent) + if firstChild != nil && dom.TagName(firstChild) == "div" { + dom.SetAttribute(firstChild, "id", "readability-page-1") + dom.SetAttribute(firstChild, "class", "page") + } + } else { + div := dom.CreateElement("div") + dom.SetAttribute(div, "id", "readability-page-1") + dom.SetAttribute(div, "class", "page") + for articleContent.FirstChild != nil { + dom.AppendChild(div, articleContent.FirstChild) + } + dom.AppendChild(articleContent, div) + } + + parseSuccessful := true + + // Now that we've gone through the full algorithm, check to + // see if we got any meaningful content. If we didn't, we may + // need to re-run grabArticle with different flags set. This + // gives us a higher likelihood of finding the content, and + // the sieve approach gives us a higher likelihood of + // finding the -right- content. + textLength := charCount(ps.getInnerText(articleContent, true)) + if textLength < ps.CharThresholds { + parseSuccessful = false + + if ps.flags.stripUnlikelys { + ps.flags.stripUnlikelys = false + ps.attempts = append(ps.attempts, parseAttempt{ + articleContent: articleContent, + textLength: textLength, + }) + } else if ps.flags.useWeightClasses { + ps.flags.useWeightClasses = false + ps.attempts = append(ps.attempts, parseAttempt{ + articleContent: articleContent, + textLength: textLength, + }) + } else if ps.flags.cleanConditionally { + ps.flags.cleanConditionally = false + ps.attempts = append(ps.attempts, parseAttempt{ + articleContent: articleContent, + textLength: textLength, + }) + } else { + ps.attempts = append(ps.attempts, parseAttempt{ + articleContent: articleContent, + textLength: textLength, + }) + + // No luck after removing flags, just return the + // longest text we found during the different loops * + sort.Slice(ps.attempts, func(i, j int) bool { + return ps.attempts[i].textLength > ps.attempts[j].textLength + }) + + // But first check if we actually have something + if ps.attempts[0].textLength == 0 { + return nil + } + + articleContent = ps.attempts[0].articleContent + parseSuccessful = true + } + } + + if parseSuccessful { + return articleContent + } + } +} + +// isValidByline checks whether the input string could be a byline. +// This verifies that the input is a string, and that the length +// is less than 100 chars. +func (ps *Parser) isValidByline(byline string) bool { + byline = strings.TrimSpace(byline) + nChar := charCount(byline) + return nChar > 0 && nChar < 100 +} + +// getJSONLD try to extract metadata from JSON-LD object. +// For now, only Schema.org objects of type Article or its subtypes are supported. +func (ps *Parser) getJSONLD() (map[string]string, error) { + var metadata map[string]string + + scripts := dom.QuerySelectorAll(ps.doc, `script[type="application/ld+json"]`) + ps.forEachNode(scripts, func(jsonLdElement *html.Node, _ int) { + if metadata != nil { + return + } + + // Strip CDATA markers if present + content := rxCDATA.ReplaceAllString(dom.TextContent(jsonLdElement), "") + + // Decode JSON + var parsed map[string]interface{} + err := json.Unmarshal([]byte(content), &parsed) + if err != nil { + ps.logf("error while decoding json: %v", err) + return + } + + // Check context + strContext, isString := parsed["@context"].(string) + if !isString || !rxSchemaOrg.MatchString(strContext) { + return + } + + // If parsed doesn't have any @type, find it in its graph list + if _, typeExist := parsed["@type"]; !typeExist { + graphList, isArray := parsed["@graph"].([]interface{}) + if !isArray { + return + } + + for _, graph := range graphList { + objGraph, isObj := graph.(map[string]interface{}) + if !isObj { + continue + } + + strType, isString := objGraph["@type"].(string) + if isString && rxJsonLdArticleTypes.MatchString(strType) { + parsed = objGraph + break + } + } + } + + // Once again, make sure parsed has valid @type + strType, isString := parsed["@type"].(string) + if !isString || !rxJsonLdArticleTypes.MatchString(strType) { + return + } + + // Initiate metadata + metadata = make(map[string]string) + + // Title + name, nameIsString := parsed["name"].(string) + headline, headlineIsString := parsed["headline"].(string) + + if nameIsString && headlineIsString && name != headline { + // We have both name and headline element in the JSON-LD. They should both be the same + // but some websites like aktualne.cz put their own name into "name" and the article + // title to "headline" which confuses Readability. So we try to check if either "name" + // or "headline" closely matches the html title, and if so, use that one. If not, then + // we use "name" by default. + title := ps.getArticleTitle() + nameMatches := ps.textSimilarity(name, title) > 0.75 + headlineMatches := ps.textSimilarity(headline, title) > 0.75 + + if headlineMatches && !nameMatches { + metadata["title"] = headline + } else { + metadata["title"] = name + } + } else if name, isString := parsed["name"].(string); isString { + metadata["title"] = strings.TrimSpace(name) + } else if headline, isString := parsed["headline"].(string); isString { + metadata["title"] = strings.TrimSpace(headline) + } + + // Author + switch val := parsed["author"].(type) { + case map[string]interface{}: + if name, isString := val["name"].(string); isString { + metadata["byline"] = strings.TrimSpace(name) + } + + case []interface{}: + var authors []string + for _, author := range val { + objAuthor, isObj := author.(map[string]interface{}) + if !isObj { + continue + } + + if name, isString := objAuthor["name"].(string); isString { + authors = append(authors, strings.TrimSpace(name)) + } + } + metadata["byline"] = strings.Join(authors, ", ") + } + + // Description + if description, isString := parsed["description"].(string); isString { + metadata["excerpt"] = strings.TrimSpace(description) + } + + // Publisher + if objPublisher, isObj := parsed["publisher"].(map[string]interface{}); isObj { + if name, isString := objPublisher["name"].(string); isString { + metadata["siteName"] = strings.TrimSpace(name) + } + } + }) + + return metadata, nil +} + +// getArticleMetadata attempts to get excerpt and byline +// metadata for the article. +func (ps *Parser) getArticleMetadata(jsonLd map[string]string) map[string]string { + values := make(map[string]string) + metaElements := dom.GetElementsByTagName(ps.doc, "meta") + + // Find description tags. + ps.forEachNode(metaElements, func(element *html.Node, _ int) { + elementName := dom.GetAttribute(element, "name") + elementProperty := dom.GetAttribute(element, "property") + content := dom.GetAttribute(element, "content") + if content == "" { + return + } + matches := []string{} + name := "" + + if elementProperty != "" { + matches = rxPropertyPattern.FindAllString(elementProperty, -1) + for i := len(matches) - 1; i >= 0; i-- { + // Convert to lowercase, and remove any whitespace + // so we can match belops. + name = strings.ToLower(matches[i]) + name = strings.Join(strings.Fields(name), "") + // multiple authors + values[name] = strings.TrimSpace(content) + } + } + + if len(matches) == 0 && elementName != "" && rxNamePattern.MatchString(elementName) { + // Convert to lowercase, remove any whitespace, and convert + // dots to colons so we can match belops. + name = strings.ToLower(elementName) + name = strings.Join(strings.Fields(name), "") + name = strings.Replace(name, ".", ":", -1) + values[name] = strings.TrimSpace(content) + } + }) + + // get title + metadataTitle := strOr( + jsonLd["title"], + values["dc:title"], + values["dcterm:title"], + values["og:title"], + values["weibo:article:title"], + values["weibo:webpage:title"], + values["title"], + values["twitter:title"]) + + if metadataTitle == "" { + metadataTitle = ps.getArticleTitle() + } + + // get author + metadataByline := strOr( + jsonLd["byline"], + values["dc:creator"], + values["dcterm:creator"], + values["author"]) + + // get description + metadataExcerpt := strOr( + jsonLd["excerpt"], + values["dc:description"], + values["dcterm:description"], + values["og:description"], + values["weibo:article:description"], + values["weibo:webpage:description"], + values["description"], + values["twitter:description"]) + + // get site name + metadataSiteName := strOr(jsonLd["siteName"], values["og:site_name"]) + + // get image thumbnail + metadataImage := strOr( + values["og:image"], + values["image"], + values["twitter:image"]) + + // get favicon + metadataFavicon := ps.getArticleFavicon() + + // in many sites the meta value is escaped with HTML entities, + // so here we need to unescape it + metadataTitle = shtml.UnescapeString(metadataTitle) + metadataByline = shtml.UnescapeString(metadataByline) + metadataExcerpt = shtml.UnescapeString(metadataExcerpt) + metadataSiteName = shtml.UnescapeString(metadataSiteName) + + return map[string]string{ + "title": metadataTitle, + "byline": metadataByline, + "excerpt": metadataExcerpt, + "siteName": metadataSiteName, + "image": metadataImage, + "favicon": metadataFavicon, + } +} + +// isSingleImage checks if node is image, or if node contains exactly +// only one image whether as a direct child or as its descendants. +func (ps *Parser) isSingleImage(node *html.Node) bool { + if dom.TagName(node) == "img" { + return true + } + + children := dom.Children(node) + textContent := dom.TextContent(node) + if len(children) != 1 || strings.TrimSpace(textContent) != "" { + return false + } + + return ps.isSingleImage(children[0]) +} + +// unwrapNoscriptImages finds all