Skip to content

Commit

Permalink
feat(scraper): add more puppeteer metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
mytlogos committed Sep 9, 2022
1 parent 2bb8866 commit eaceec2
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 7 deletions.
14 changes: 14 additions & 0 deletions packages/scraper/src/externals/request/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import puppeteer from "puppeteer-extra";
import { HTTPRequest, HTTPResponse, Protocol, Browser, Page } from "puppeteer";
import puppeteerStealthPlugin from "puppeteer-extra-plugin-stealth";
import { getStoreValue, StoreKey } from "enterprise-core/dist/asyncStorage";
import { channel } from "diagnostics_channel";
puppeteer.use(puppeteerStealthPlugin());

function transformAxiosResponse(response: AxiosResponse): Response {
Expand Down Expand Up @@ -41,6 +42,8 @@ function transformAxiosResponse(response: AxiosResponse): Response {
};
}

const puppeteerChannel = channel("enterprise-puppeteer");

// remove pages after 10 minutes of inactivity
const pages = new Cache<string, Promise<Page>>({ stdTTL: 60 * 10, useClones: false, size: 20 });
const pageUsed = new Set<string>();
Expand All @@ -50,9 +53,14 @@ pages.on("expired", (_key, value: Promise<Page>) => {
if (!page.isClosed()) {
page.close();
}
puppeteerChannel.publish({ browser: browserGetter.isActive(), pages: pages.stats.keys });
});
});

pages.on("set", () => {
puppeteerChannel.publish({ browser: browserGetter.isActive(), pages: pages.stats.keys });
});

// Modified from https://www.bannerbear.com/blog/ways-to-speed-up-puppeteer-screenshots/
const minimalArgs = [
"--autoplay-policy=user-gesture-required",
Expand Down Expand Up @@ -115,6 +123,7 @@ class BrowserGetter {
const browserPromise = this.#puppeteerBrowser;
this.#timeoutId = undefined;
this.#puppeteerBrowser = undefined;
pages.flushAll();

browserPromise?.then((browser) => browser.close()).catch(logger.error);
}, 1000 * 60 * 15);
Expand All @@ -126,7 +135,12 @@ class BrowserGetter {
});
return this.#puppeteerBrowser;
}

public isActive() {
return !!this.#puppeteerBrowser;
}
}

const browserGetter = new BrowserGetter();

/**
Expand Down
11 changes: 6 additions & 5 deletions packages/scraper/src/externals/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ import {
import { MediaType } from "enterprise-core/dist/tools";
import { ListScrapeResult } from "./listManager";

/**
* Channels accessible in Scraper.
*/
export type ScraperChannel = "enterprise-jobqueue" | "enterprise-jobs" | "enterprise-requestqueue";

export interface BasicChannelMessage {
messageType: string;
}
Expand Down Expand Up @@ -53,6 +48,11 @@ export interface JobQueueChannelMessage extends BasicChannelMessage {
max: number;
}

export interface PuppeteerMessage {
browser: boolean;
pages: number;
}

/**
* Type diagnostics_channel module more restrictively.
*/
Expand All @@ -61,6 +61,7 @@ declare module "diagnostics_channel" {
"enterprise-jobqueue": JobQueueChannelMessage;
"enterprise-jobs": JobChannelMessage;
"enterprise-requestqueue": RequestQueueChannelMessage;
"enterprise-puppeteer": PuppeteerMessage;
}

type ChannelNames = keyof ScraperMapping;
Expand Down
15 changes: 15 additions & 0 deletions packages/scraper/src/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,27 @@ const jobDuration = new Histogram({
labelNames: ["jobType", "hook"],
});

const puppeteerActive = new Gauge({
name: "scraper_job_puppeteer_active",
help: "Puppeteer active or not",
});

const puppeteerPages = new Gauge({
name: "scraper_job_puppeteer_pages",
help: "Number of open Puppeteer Pages",
});

subscribe("enterprise-jobqueue", (message) => {
jobMaxCount.set(message.max);
jobQueueCount.set(message.queued);
jobActiveCount.set(message.active);
});

subscribe("enterprise-puppeteer", (message) => {
puppeteerActive.set(message.browser ? 1 : 0);
puppeteerPages.set(message.pages);
});

subscribe("enterprise-jobs", (message) => {
if (message.type === "finished") {
// TODO: currently silently ignore multiple used hooks, only a single one should be used anyway
Expand Down
4 changes: 2 additions & 2 deletions packages/scraper/src/websocket.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import Websocket from "ws";
import logger from "enterprise-core/dist/logger";
import { remove } from "enterprise-core/dist/tools";
import { ChannelNames, subscribe, unsubscribe } from "diagnostics_channel";
import { ScraperChannel, WSRequest } from "./externals/types";
import { WSRequest } from "./externals/types";
import { DefaultJobScraper } from "./scheduler/jobScheduler";
import { publishQueues } from "./externals/queueRequest";
import { registerOnExitHandler } from "enterprise-core/dist/exit";
Expand Down Expand Up @@ -64,7 +64,7 @@ ws.on("connection", (socket) => {

class SocketChannelListener {
private readonly listenerSockets: Websocket[] = [];
private readonly channel: ScraperChannel;
private readonly channel: ChannelNames;

public constructor(channel: ChannelNames) {
this.channel = channel;
Expand Down

0 comments on commit eaceec2

Please sign in to comment.