-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fetch and use Botkube website and docs for AI assistant (#25)
- Fetch Botkube website and docs for AI assistant - Use Botkube website and docs for AI assistant - Add Readme for assistant setup - Migrate assistant setup to TypeScript - we need to use beta functionality (vertex store, file search) which isn't available in the unofficial Go client.
- Loading branch information
Showing
258 changed files
with
18,762 additions
and
301 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,3 +17,5 @@ | |
dist/ | ||
plugins-index.yaml | ||
/.idea/ | ||
|
||
node_modules |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# OpenAI Assistant setup | ||
|
||
The tool configures the OpenAI assistant for Botkube AI plugin. It uses documents from the `assets` directory for file search capabilities. | ||
|
||
## Toolchain | ||
|
||
This project uses [Volta](https://github.com/volta-cli/volta) to manage JS tools. Volta automatically downloads and installs the right Node.js version when you run any of the `node` or `npm` commands. | ||
|
||
It is recommended to install it before running the script, to ensure the right Node.js version is used. | ||
|
||
## Usage | ||
|
||
Navigate to the directory `hack/assistant-setup` and execute one of the following commands. | ||
|
||
### Install dependencies | ||
|
||
To install all dependencies, run: | ||
|
||
```sh | ||
npm install | ||
``` | ||
|
||
### Start app | ||
|
||
```sh | ||
export OPENAI_API_KEY=... # your Open AI API key | ||
export ASSISTANT_ENV=dev # dev or prod | ||
npm run start | ||
``` | ||
|
||
To use your own assistant, modify the `assistantID` variable in the `index.ts` file. | ||
|
||
## Development | ||
|
||
## Refetch content for file search | ||
|
||
> **NOTE:** The process uses [Jina.AI Reader API](https://github.com/jina-ai/reader) and usually takes 10-15 minutes. All files will be removed before the process starts. | ||
To scrap the content from the latest Botkube website and Botkube Docs, run the following command: | ||
|
||
```sh | ||
npm run fetch-content | ||
``` | ||
|
||
## Format code | ||
|
||
To format code, run: | ||
|
||
```sh | ||
npm run format | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
package main | ||
|
||
import ( | ||
"encoding/xml" | ||
"fmt" | ||
"io" | ||
"net/http" | ||
"net/url" | ||
"os" | ||
"regexp" | ||
"strings" | ||
"time" | ||
|
||
"github.com/avast/retry-go/v4" | ||
|
||
"github.com/kubeshop/botkube/pkg/config" | ||
"github.com/kubeshop/botkube/pkg/loggerx" | ||
"github.com/kubeshop/botkube/pkg/multierror" | ||
"github.com/sirupsen/logrus" | ||
) | ||
|
||
const ( | ||
marketingSitemapURL = "https://botkube.io/sitemap.xml" | ||
docsSitemapURL = "https://docs.botkube.io/sitemap.xml" | ||
processingAPIURL = "https://r.jina.ai" | ||
contentDir = "content" | ||
maxRetries = 5 | ||
retryInterval = 1 * time.Second | ||
httpCliTimeout = 1 * time.Minute | ||
) | ||
|
||
var excludedDocsPagesRegex = regexp.MustCompile(`^https:\/\/docs\.botkube\.io\/(?:\d+\.\d+|next)\/.*`) | ||
|
||
func main() { | ||
log := loggerx.New(config.Logger{ | ||
Level: "info", | ||
Formatter: "text", | ||
}) | ||
|
||
fetcher := &contentFetcher{ | ||
log: log, | ||
httpCli: &http.Client{ | ||
Timeout: httpCliTimeout, | ||
}, | ||
} | ||
|
||
log.Infof("Removing old %q directory...", contentDir) | ||
err := os.RemoveAll(contentDir) | ||
loggerx.ExitOnError(err, "while removing old directory") | ||
|
||
log.Info("Fetching Botkube sitemap...") | ||
marketingPages, err := fetcher.getURLsToDownloadFromSitemap(marketingSitemapURL) | ||
loggerx.ExitOnError(err, "while fetching Botkube sitemap") | ||
|
||
log.Info("Fetching Botkube docs sitemap...") | ||
docsPages, err := fetcher.getURLsToDownloadFromSitemap(docsSitemapURL) | ||
loggerx.ExitOnError(err, "while fetching Botkube docs sitemap") | ||
|
||
log.Info("Preparing list of pages to fetch...") | ||
pagesToFetch := fetcher.preparePageList(docsPages, marketingPages) | ||
log.Infof("Found %d pages to fetch", len(pagesToFetch)) | ||
|
||
log.Infof("Creating %q directory...", contentDir) | ||
err = os.MkdirAll(contentDir, os.ModePerm) | ||
loggerx.ExitOnError(err, "while creating directory") | ||
|
||
errs := multierror.New() | ||
for i, page := range pagesToFetch { | ||
filePath, err := fetcher.filePathForURL(page) | ||
if err != nil { | ||
errs = multierror.Append(errs, err) | ||
continue | ||
} | ||
log.WithFields(logrus.Fields{ | ||
"url": page, | ||
"filePath": filePath, | ||
}).Infof("Fetching and saving page %d of %d...", i+1, len(pagesToFetch)) | ||
|
||
err = retry.Do( | ||
func() error { | ||
return fetcher.fetchAndSavePage(page, filePath) | ||
}, | ||
retry.Attempts(maxRetries), | ||
retry.OnRetry(func(n uint, err error) { | ||
log.WithError(err).Errorf("while fetching and saving page %q. Retrying...", page) | ||
}), | ||
retry.Delay(retryInterval), | ||
) | ||
|
||
if err != nil { | ||
errs = multierror.Append(errs, err) | ||
} | ||
} | ||
|
||
loggerx.ExitOnError(errs.ErrorOrNil(), "while fetching and saving docs pages") | ||
|
||
log.Infof("Saved %d docs pages", len(pagesToFetch)) | ||
} | ||
|
||
type contentFetcher struct { | ||
log logrus.FieldLogger | ||
httpCli *http.Client | ||
} | ||
|
||
type sitemapURLSet struct { | ||
URLs []sitemapURL `xml:"url"` | ||
} | ||
|
||
type sitemapURL struct { | ||
Loc string `xml:"loc"` | ||
} | ||
|
||
func (f *contentFetcher) getURLsToDownloadFromSitemap(sitemapURL string) ([]string, error) { | ||
log := f.log.WithField("sitemapURL", sitemapURL) | ||
// nolint:gosec | ||
res, err := http.Get(sitemapURL) | ||
if err != nil { | ||
return nil, fmt.Errorf("while fetching sitemap %q: %w", sitemapURL, err) | ||
} | ||
defer res.Body.Close() | ||
|
||
if res.StatusCode != http.StatusOK { | ||
return nil, fmt.Errorf("invalid status code when fetching Botkube sitemap: %d", res.StatusCode) | ||
} | ||
|
||
log.Info("Decoding sitemap...") | ||
var sitemap sitemapURLSet | ||
err = xml.NewDecoder(res.Body).Decode(&sitemap) | ||
if err != nil { | ||
return nil, fmt.Errorf("while decoding sitemap %q: %w", sitemapURL, err) | ||
} | ||
|
||
var urls []string | ||
for _, part := range sitemap.URLs { | ||
urls = append(urls, part.Loc) | ||
} | ||
|
||
log.Infof("Found %d sitemap entries", len(urls)) | ||
return urls, nil | ||
} | ||
|
||
func (f *contentFetcher) fetchAndSavePage(inURL, filePath string) error { | ||
pageURL := fmt.Sprintf("%s/%s", processingAPIURL, inURL) | ||
|
||
req, err := http.NewRequest(http.MethodGet, pageURL, nil) | ||
if err != nil { | ||
return fmt.Errorf("while creating request for page %q: %w", pageURL, err) | ||
} | ||
req.Header.Set("Content-Type", "text/event-stream") | ||
|
||
res, err := f.httpCli.Do(req) | ||
if err != nil { | ||
return fmt.Errorf("while fetching page %q: %w", pageURL, err) | ||
} | ||
defer res.Body.Close() | ||
|
||
if res.StatusCode != http.StatusOK { | ||
return fmt.Errorf("invalid status code when fetching page %q: %d", pageURL, res.StatusCode) | ||
} | ||
|
||
// nolint:gosec | ||
file, err := os.Create(filePath) | ||
if err != nil { | ||
return fmt.Errorf("while creating file %q: %w", filePath, err) | ||
} | ||
defer file.Close() | ||
|
||
_, err = io.Copy(file, res.Body) | ||
if err != nil { | ||
return fmt.Errorf("while writing to file %q: %w", filePath, err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (f *contentFetcher) preparePageList(docsPages, marketingPages []string) []string { | ||
var out []string | ||
for _, page := range docsPages { | ||
// remove all docs for previous and upcoming versions | ||
if excludedDocsPagesRegex.MatchString(page) { | ||
continue | ||
} | ||
|
||
out = append(out, strings.TrimSpace(page)) | ||
} | ||
for _, page := range marketingPages { | ||
out = append(out, strings.TrimSpace(page)) | ||
} | ||
|
||
return out | ||
} | ||
|
||
func (f *contentFetcher) filePathForURL(inURL string) (string, error) { | ||
parsedInURL, err := url.Parse(inURL) | ||
if err != nil { | ||
return "", fmt.Errorf("while parsing url %q: %w", inURL, err) | ||
} | ||
|
||
prefix := parsedInURL.Host | ||
urlPath := strings.Trim(parsedInURL.Path, "/") | ||
urlPath = strings.Replace(urlPath, "/", "__", -1) | ||
|
||
fileName := prefix | ||
if urlPath != "" { | ||
fileName = fmt.Sprintf("%s__%s", prefix, urlPath) | ||
} | ||
|
||
return fmt.Sprintf("%s/%s.md", contentDir, fileName), nil | ||
} |
Oops, something went wrong.