diff --git a/packages/scraper/src/externals/request/request.ts b/packages/scraper/src/externals/request/request.ts index 94815127..db56be6c 100644 --- a/packages/scraper/src/externals/request/request.ts +++ b/packages/scraper/src/externals/request/request.ts @@ -14,6 +14,7 @@ import puppeteer from "puppeteer-extra"; import { HTTPRequest, HTTPResponse, Protocol, Browser, Page } from "puppeteer"; import puppeteerStealthPlugin from "puppeteer-extra-plugin-stealth"; import { getStoreValue, StoreKey } from "enterprise-core/dist/asyncStorage"; +import { channel } from "diagnostics_channel"; puppeteer.use(puppeteerStealthPlugin()); function transformAxiosResponse(response: AxiosResponse): Response { @@ -41,6 +42,8 @@ function transformAxiosResponse(response: AxiosResponse): Response { }; } +const puppeteerChannel = channel("enterprise-puppeteer"); + // remove pages after 10 minutes of inactivity const pages = new Cache>({ stdTTL: 60 * 10, useClones: false, size: 20 }); const pageUsed = new Set(); @@ -50,9 +53,14 @@ pages.on("expired", (_key, value: Promise) => { if (!page.isClosed()) { page.close(); } + puppeteerChannel.publish({ browser: browserGetter.isActive(), pages: pages.stats.keys }); }); }); +pages.on("set", () => { + puppeteerChannel.publish({ browser: browserGetter.isActive(), pages: pages.stats.keys }); +}); + // Modified from https://www.bannerbear.com/blog/ways-to-speed-up-puppeteer-screenshots/ const minimalArgs = [ "--autoplay-policy=user-gesture-required", @@ -115,6 +123,7 @@ class BrowserGetter { const browserPromise = this.#puppeteerBrowser; this.#timeoutId = undefined; this.#puppeteerBrowser = undefined; + pages.flushAll(); browserPromise?.then((browser) => browser.close()).catch(logger.error); }, 1000 * 60 * 15); @@ -126,7 +135,12 @@ class BrowserGetter { }); return this.#puppeteerBrowser; } + + public isActive() { + return !!this.#puppeteerBrowser; + } } + const browserGetter = new BrowserGetter(); /** diff --git a/packages/scraper/src/externals/types.ts b/packages/scraper/src/externals/types.ts index caa7c005..6674aa10 100644 --- a/packages/scraper/src/externals/types.ts +++ b/packages/scraper/src/externals/types.ts @@ -16,11 +16,6 @@ import { import { MediaType } from "enterprise-core/dist/tools"; import { ListScrapeResult } from "./listManager"; -/** - * Channels accessible in Scraper. - */ -export type ScraperChannel = "enterprise-jobqueue" | "enterprise-jobs" | "enterprise-requestqueue"; - export interface BasicChannelMessage { messageType: string; } @@ -53,6 +48,11 @@ export interface JobQueueChannelMessage extends BasicChannelMessage { max: number; } +export interface PuppeteerMessage { + browser: boolean; + pages: number; +} + /** * Type diagnostics_channel module more restrictively. */ @@ -61,6 +61,7 @@ declare module "diagnostics_channel" { "enterprise-jobqueue": JobQueueChannelMessage; "enterprise-jobs": JobChannelMessage; "enterprise-requestqueue": RequestQueueChannelMessage; + "enterprise-puppeteer": PuppeteerMessage; } type ChannelNames = keyof ScraperMapping; diff --git a/packages/scraper/src/metrics.ts b/packages/scraper/src/metrics.ts index 4c85919e..19e7f157 100644 --- a/packages/scraper/src/metrics.ts +++ b/packages/scraper/src/metrics.ts @@ -83,12 +83,27 @@ const jobDuration = new Histogram({ labelNames: ["jobType", "hook"], }); +const puppeteerActive = new Gauge({ + name: "scraper_job_puppeteer_active", + help: "Puppeteer active or not", +}); + +const puppeteerPages = new Gauge({ + name: "scraper_job_puppeteer_pages", + help: "Number of open Puppeteer Pages", +}); + subscribe("enterprise-jobqueue", (message) => { jobMaxCount.set(message.max); jobQueueCount.set(message.queued); jobActiveCount.set(message.active); }); +subscribe("enterprise-puppeteer", (message) => { + puppeteerActive.set(message.browser ? 1 : 0); + puppeteerPages.set(message.pages); +}); + subscribe("enterprise-jobs", (message) => { if (message.type === "finished") { // TODO: currently silently ignore multiple used hooks, only a single one should be used anyway diff --git a/packages/scraper/src/websocket.ts b/packages/scraper/src/websocket.ts index 425bddca..3abb97c1 100644 --- a/packages/scraper/src/websocket.ts +++ b/packages/scraper/src/websocket.ts @@ -2,7 +2,7 @@ import Websocket from "ws"; import logger from "enterprise-core/dist/logger"; import { remove } from "enterprise-core/dist/tools"; import { ChannelNames, subscribe, unsubscribe } from "diagnostics_channel"; -import { ScraperChannel, WSRequest } from "./externals/types"; +import { WSRequest } from "./externals/types"; import { DefaultJobScraper } from "./scheduler/jobScheduler"; import { publishQueues } from "./externals/queueRequest"; import { registerOnExitHandler } from "enterprise-core/dist/exit"; @@ -64,7 +64,7 @@ ws.on("connection", (socket) => { class SocketChannelListener { private readonly listenerSockets: Websocket[] = []; - private readonly channel: ScraperChannel; + private readonly channel: ChannelNames; public constructor(channel: ChannelNames) { this.channel = channel;