diff --git a/end2end/server/app.js b/end2end/server/app.js index c81fb6e1..fb6fc489 100644 --- a/end2end/server/app.js +++ b/end2end/server/app.js @@ -6,8 +6,8 @@ const listEvents = require("./src/handlers/listEvents"); const createApp = require("./src/handlers/createApp"); const checkToken = require("./src/middleware/checkToken"); const updateConfig = require("./src/handlers/updateConfig"); -const ipLists = require("./src/handlers/ipLists"); -const updateIPLists = require("./src/handlers/updateIPLists"); +const lists = require("./src/handlers/lists"); +const updateLists = require("./src/handlers/updateLists"); const app = express(); @@ -21,8 +21,8 @@ app.post("/api/runtime/config", checkToken, updateConfig); app.get("/api/runtime/events", checkToken, listEvents); app.post("/api/runtime/events", checkToken, captureEvent); -app.get("/api/runtime/firewall/lists", checkToken, ipLists); -app.post("/api/runtime/firewall/lists", checkToken, updateIPLists); +app.get("/api/runtime/firewall/lists", checkToken, lists); +app.post("/api/runtime/firewall/lists", checkToken, updateLists); app.post("/api/runtime/apps", createApp); diff --git a/end2end/server/src/handlers/ipLists.js b/end2end/server/src/handlers/lists.js similarity index 64% rename from end2end/server/src/handlers/ipLists.js rename to end2end/server/src/handlers/lists.js index 048bc1b5..9334a1f2 100644 --- a/end2end/server/src/handlers/ipLists.js +++ b/end2end/server/src/handlers/lists.js @@ -1,11 +1,15 @@ -const { getBlockedIPAddresses } = require("../zen/config"); +const { + getBlockedIPAddresses, + getBlockedUserAgents, +} = require("../zen/config"); -module.exports = function ipLists(req, res) { +module.exports = function lists(req, res) { if (!req.app) { throw new Error("App is missing"); } const blockedIps = getBlockedIPAddresses(req.app); + const blockedUserAgents = getBlockedUserAgents(req.app); res.json({ success: true, @@ -20,5 +24,6 @@ module.exports = function ipLists(req, res) { }, ] : [], + blockedUserAgents: blockedUserAgents, }); }; diff --git a/end2end/server/src/handlers/updateIPLists.js b/end2end/server/src/handlers/updateLists.js similarity index 73% rename from end2end/server/src/handlers/updateIPLists.js rename to end2end/server/src/handlers/updateLists.js index 9c13fb03..e0c7d908 100644 --- a/end2end/server/src/handlers/updateIPLists.js +++ b/end2end/server/src/handlers/updateLists.js @@ -1,4 +1,7 @@ -const { updateBlockedIPAddresses } = require("../zen/config"); +const { + updateBlockedIPAddresses, + updateBlockedUserAgents, +} = require("../zen/config"); module.exports = function updateIPLists(req, res) { if (!req.app) { @@ -28,5 +31,12 @@ module.exports = function updateIPLists(req, res) { updateBlockedIPAddresses(req.app, req.body.blockedIPAddresses); + if ( + req.body.blockedUserAgents && + typeof req.body.blockedUserAgents === "string" + ) { + updateBlockedUserAgents(req.app, req.body.blockedUserAgents); + } + res.json({ success: true }); }; diff --git a/end2end/server/src/zen/config.js b/end2end/server/src/zen/config.js index b07c7c6f..77564fb8 100644 --- a/end2end/server/src/zen/config.js +++ b/end2end/server/src/zen/config.js @@ -38,6 +38,7 @@ function updateAppConfig(app, newConfig) { } const blockedIPAddresses = []; +const blockedUserAgents = []; function updateBlockedIPAddresses(app, ips) { let entry = blockedIPAddresses.find((ip) => ip.serviceId === app.serviceId); @@ -63,9 +64,35 @@ function getBlockedIPAddresses(app) { return { serviceId: app.serviceId, ipAddresses: [] }; } +function updateBlockedUserAgents(app, uas) { + let entry = blockedUserAgents.find((e) => e.serviceId === e.serviceId); + + if (entry) { + entry.userAgents = uas; + } else { + entry = { serviceId: app.serviceId, userAgents: uas }; + blockedUserAgents.push(entry); + } + + // Bump lastUpdatedAt + updateAppConfig(app, {}); +} + +function getBlockedUserAgents(app) { + const entry = blockedUserAgents.find((e) => e.serviceId === e.serviceId); + + if (entry) { + return entry.userAgents; + } + + return ""; +} + module.exports = { getAppConfig, updateAppConfig, updateBlockedIPAddresses, getBlockedIPAddresses, + updateBlockedUserAgents, + getBlockedUserAgents, }; diff --git a/end2end/tests/hono-xml-geo-blocking.test.js b/end2end/tests/hono-xml-blocklists.test.js similarity index 54% rename from end2end/tests/hono-xml-geo-blocking.test.js rename to end2end/tests/hono-xml-blocklists.test.js index a523e7a0..c93df214 100644 --- a/end2end/tests/hono-xml-geo-blocking.test.js +++ b/end2end/tests/hono-xml-blocklists.test.js @@ -25,6 +25,7 @@ t.beforeEach(async () => { }, body: JSON.stringify({ blockedIPAddresses: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], + blockedUserAgents: "hacker|attacker|GPTBot", }), } ); @@ -112,3 +113,86 @@ t.test("it blocks geo restricted IPs", (t) => { server.kill(); }); }); + +t.test("it blocks bots", (t) => { + const server = spawn(`node`, [pathToApp, "4003"], { + env: { + ...process.env, + AIKIDO_DEBUG: "true", + AIKIDO_BLOCKING: "true", + AIKIDO_TOKEN: token, + AIKIDO_URL: testServerUrl, + }, + }); + + server.on("close", () => { + t.end(); + }); + + server.on("error", (err) => { + t.fail(err.message); + }); + + let stdout = ""; + server.stdout.on("data", (data) => { + stdout += data.toString(); + }); + + let stderr = ""; + server.stderr.on("data", (data) => { + stderr += data.toString(); + }); + + // Wait for the server to start + timeout(2000) + .then(async () => { + const resp1 = await fetch("http://127.0.0.1:4003/", { + headers: { + "User-Agent": "hacker", + }, + signal: AbortSignal.timeout(5000), + }); + t.same(resp1.status, 403); + t.same( + await resp1.text(), + "You are not allowed to access this resource because you have been identified as a bot." + ); + + // Block GPT bot + const resp2 = await fetch("http://127.0.0.1:4003/", { + headers: { + "User-Agent": + "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot", + }, + signal: AbortSignal.timeout(5000), + }); + t.same(resp2.status, 403); + t.same( + await resp2.text(), + "You are not allowed to access this resource because you have been identified as a bot." + ); + + // Does not block allowed user agents + const allowedUserAgents = [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15", + "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.200 Mobile Safari/537.36", + ]; + for (const userAgent of allowedUserAgents) { + const resp = await fetch("http://127.0.0.1:4003/", { + headers: { + "User-Agent": userAgent, + }, + signal: AbortSignal.timeout(5000), + }); + t.same(resp.status, 200); + } + }) + .catch((error) => { + t.fail(error.message); + }) + .finally(() => { + server.kill(); + }); +}); diff --git a/library/agent/Agent.test.ts b/library/agent/Agent.test.ts index 40734f8a..ba9668e1 100644 --- a/library/agent/Agent.test.ts +++ b/library/agent/Agent.test.ts @@ -31,6 +31,7 @@ wrap(fetch, "fetch", function mock() { ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], + blockedUserAgents: "AI2Bot|Bytespider", }), }; }; @@ -936,7 +937,7 @@ t.test("it sends middleware installed with heartbeat", async () => { clock.uninstall(); }); -t.test("it fetches blocked IPs", async () => { +t.test("it fetches blocked lists", async () => { const agent = createTestAgent({ token: new Token("123"), }); @@ -953,6 +954,28 @@ t.test("it fetches blocked IPs", async () => { blocked: true, reason: "Description", }); + + t.same( + agent + .getConfig() + .isUserAgentBlocked( + "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" + ), + { + blocked: true, + } + ); + + t.same( + agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible) Bytespider"), + { + blocked: true, + } + ); + + t.same(agent.getConfig().isUserAgentBlocked("Mozilla/5.0 (compatible)"), { + blocked: false, + }); }); t.test("it does not fetch blocked IPs if serverless", async () => { @@ -968,4 +991,15 @@ t.test("it does not fetch blocked IPs if serverless", async () => { t.same(agent.getConfig().isIPAddressBlocked("1.3.2.4"), { blocked: false, }); + + t.same( + agent + .getConfig() + .isUserAgentBlocked( + "Mozilla/5.0 (compatible) AI2Bot (+https://www.allenai.org/crawler)" + ), + { + blocked: false, + } + ); }); diff --git a/library/agent/Agent.ts b/library/agent/Agent.ts index 5182ec1e..f9739eea 100644 --- a/library/agent/Agent.ts +++ b/library/agent/Agent.ts @@ -7,7 +7,7 @@ import { ip } from "../helpers/ipAddress"; import { filterEmptyRequestHeaders } from "../helpers/filterEmptyRequestHeaders"; import { limitLengthMetadata } from "../helpers/limitLengthMetadata"; import { RateLimiter } from "../ratelimiting/RateLimiter"; -import { fetchBlockedIPAddresses } from "./api/fetchBlockedIPAddresses"; +import { fetchBlockedLists } from "./api/fetchBlockedLists"; import { ReportingAPI, ReportingAPIResponse } from "./api/ReportingAPI"; import { AgentInfo } from "./api/Event"; import { Token } from "./api/Token"; @@ -114,7 +114,7 @@ export class Agent { this.updateServiceConfig(result); - await this.updateBlockedIPAddresses(); + await this.updateBlockedLists(); } } @@ -341,7 +341,7 @@ export class Agent { this.interval.unref(); } - private async updateBlockedIPAddresses() { + private async updateBlockedLists() { if (!this.token) { return; } @@ -352,12 +352,13 @@ export class Agent { } try { - const blockedIps = await fetchBlockedIPAddresses(this.token); - this.serviceConfig.updateBlockedIPAddresses(blockedIps); - } catch (error: any) { - this.logger.error( - `Failed to update blocked IP addresses: ${error.message}` + const { blockedIPAddresses, blockedUserAgents } = await fetchBlockedLists( + this.token ); + this.serviceConfig.updateBlockedIPAddresses(blockedIPAddresses); + this.serviceConfig.updateBlockedUserAgents(blockedUserAgents); + } catch (error: any) { + this.logger.error(`Failed to update blocked lists: ${error.message}`); } } @@ -369,10 +370,8 @@ export class Agent { lastUpdatedAt: this.serviceConfig.getLastUpdatedAt(), onConfigUpdate: (config) => { this.updateServiceConfig({ success: true, ...config }); - this.updateBlockedIPAddresses().catch((error) => { - this.logger.error( - `Failed to update blocked IP addresses: ${error.message}` - ); + this.updateBlockedLists().catch((error) => { + this.logger.error(`Failed to update blocked lists: ${error.message}`); }); }, }); diff --git a/library/agent/ServiceConfig.test.ts b/library/agent/ServiceConfig.test.ts index 467d28fd..b2fc4aba 100644 --- a/library/agent/ServiceConfig.test.ts +++ b/library/agent/ServiceConfig.test.ts @@ -130,3 +130,16 @@ t.test("ip blocking works", async () => { }); t.same(config.isIPAddressBlocked("1.2"), { blocked: false }); }); + +t.test("it blocks bots", async () => { + const config = new ServiceConfig([], 0, [], [], true, []); + config.updateBlockedUserAgents("googlebot|bingbot"); + + t.same(config.isUserAgentBlocked("googlebot"), { blocked: true }); + t.same(config.isUserAgentBlocked("123 bingbot abc"), { blocked: true }); + t.same(config.isUserAgentBlocked("bing"), { blocked: false }); + + config.updateBlockedUserAgents(""); + + t.same(config.isUserAgentBlocked("googlebot"), { blocked: false }); +}); diff --git a/library/agent/ServiceConfig.ts b/library/agent/ServiceConfig.ts index 179ec5be..7d15e230 100644 --- a/library/agent/ServiceConfig.ts +++ b/library/agent/ServiceConfig.ts @@ -1,7 +1,7 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher"; import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints"; import { Endpoint } from "./Config"; -import { Blocklist as BlocklistType } from "./api/fetchBlockedIPAddresses"; +import { Blocklist as BlocklistType } from "./api/fetchBlockedLists"; export class ServiceConfig { private blockedUserIds: Map = new Map(); @@ -10,6 +10,7 @@ export class ServiceConfig { private graphqlFields: Endpoint[] = []; private blockedIPAddresses: { blocklist: IPMatcher; description: string }[] = []; + private blockedUserAgentRegex: RegExp | undefined; constructor( endpoints: Endpoint[], @@ -110,6 +111,21 @@ export class ServiceConfig { this.setBlockedIPAddresses(blockedIPAddresses); } + updateBlockedUserAgents(blockedUserAgents: string) { + if (!blockedUserAgents) { + this.blockedUserAgentRegex = undefined; + return; + } + this.blockedUserAgentRegex = new RegExp(blockedUserAgents, "i"); + } + + isUserAgentBlocked(ua: string): { blocked: boolean } { + if (this.blockedUserAgentRegex) { + return { blocked: this.blockedUserAgentRegex.test(ua) }; + } + return { blocked: false }; + } + updateConfig( endpoints: Endpoint[], lastUpdatedAt: number, diff --git a/library/agent/api/fetchBlockedIPAddresses.ts b/library/agent/api/fetchBlockedLists.ts similarity index 53% rename from library/agent/api/fetchBlockedIPAddresses.ts rename to library/agent/api/fetchBlockedLists.ts index 141eb327..73fcfd39 100644 --- a/library/agent/api/fetchBlockedIPAddresses.ts +++ b/library/agent/api/fetchBlockedLists.ts @@ -8,9 +8,10 @@ export type Blocklist = { ips: string[]; }; -export async function fetchBlockedIPAddresses( - token: Token -): Promise { +export async function fetchBlockedLists(token: Token): Promise<{ + blockedIPAddresses: Blocklist[]; + blockedUserAgents: string; +}> { const baseUrl = getAPIURL(); const { body, statusCode } = await fetch({ url: new URL(`${baseUrl.toString()}api/runtime/firewall/lists`), @@ -24,12 +25,23 @@ export async function fetchBlockedIPAddresses( }); if (statusCode !== 200) { - throw new Error(`Failed to fetch blocked IP addresses: ${statusCode}`); + throw new Error(`Failed to fetch blocked lists: ${statusCode}`); } const result: { blockedIPAddresses: Blocklist[]; + blockedUserAgents: string; } = JSON.parse(body); - return result && result.blockedIPAddresses ? result.blockedIPAddresses : []; + return { + blockedIPAddresses: + result && Array.isArray(result.blockedIPAddresses) + ? result.blockedIPAddresses + : [], + // Blocked user agents are stored as a string pattern for usage in a regex (e.g. "Googlebot|Bingbot") + blockedUserAgents: + result && typeof result.blockedUserAgents === "string" + ? result.blockedUserAgents + : "", + }; } diff --git a/library/sources/Hono.test.ts b/library/sources/Hono.test.ts index 06f6ae98..91440112 100644 --- a/library/sources/Hono.test.ts +++ b/library/sources/Hono.test.ts @@ -30,6 +30,7 @@ wrap(fetch, "fetch", function mock(original) { ips: ["1.3.2.0/24", "fe80::1234:5678:abcd:ef12/64"], }, ], + blockedUserAgents: "hacker|attacker", }), }; } @@ -308,7 +309,7 @@ t.test("works using @hono/node-server (real socket ip)", opts, async (t) => { server.close(); }); -t.test("ip blocking works (real socket)", opts, async (t) => { +t.test("ip and bot blocking works (real socket)", opts, async (t) => { // Start a server with a real socket // The blocking is implemented in the HTTPServer source const { serve } = @@ -353,6 +354,19 @@ t.test("ip blocking works (real socket)", opts, async (t) => { }); t.equal(response3.statusCode, 200); + // Test blocked user agent + const response4 = await fetch.fetch({ + url: new URL("http://127.0.0.1:8766/"), + headers: { + "User-Agent": "hacker", + }, + }); + t.equal(response4.statusCode, 403); + t.equal( + response4.body, + "You are not allowed to access this resource because you have been identified as a bot." + ); + // Cleanup server server.close(); }); diff --git a/library/sources/http-server/checkIfIPAddressIsBlocked.ts b/library/sources/http-server/checkIfRequestIsBlocked.ts similarity index 74% rename from library/sources/http-server/checkIfIPAddressIsBlocked.ts rename to library/sources/http-server/checkIfRequestIsBlocked.ts index 9b79e1da..00940dc9 100644 --- a/library/sources/http-server/checkIfIPAddressIsBlocked.ts +++ b/library/sources/http-server/checkIfRequestIsBlocked.ts @@ -9,7 +9,7 @@ import { ipAllowedToAccessRoute } from "./ipAllowedToAccessRoute"; * - Whether the IP address is blocked by an IP blocklist (e.g. Geo restrictions) * - Whether the IP address is allowed to access the current route (e.g. Admin panel) */ -export function checkIfIPAddressIsBlocked( +export function checkIfRequestIsBlocked( res: ServerResponse, agent: Agent ): boolean { @@ -51,5 +51,21 @@ export function checkIfIPAddressIsBlocked( return true; } + const isUserAgentBlocked = + context.headers && typeof context.headers["user-agent"] === "string" + ? agent.getConfig().isUserAgentBlocked(context.headers["user-agent"]) + : ({ blocked: false } as const); + + if (isUserAgentBlocked.blocked) { + res.statusCode = 403; + res.setHeader("Content-Type", "text/plain"); + + res.end( + "You are not allowed to access this resource because you have been identified as a bot." + ); + + return true; + } + return false; } diff --git a/library/sources/http-server/createRequestListener.ts b/library/sources/http-server/createRequestListener.ts index ecea3c29..5eda6abb 100644 --- a/library/sources/http-server/createRequestListener.ts +++ b/library/sources/http-server/createRequestListener.ts @@ -2,7 +2,7 @@ import type { IncomingMessage, RequestListener, ServerResponse } from "http"; import { Agent } from "../../agent/Agent"; import { bindContext, getContext, runWithContext } from "../../agent/Context"; import { isPackageInstalled } from "../../helpers/isPackageInstalled"; -import { checkIfIPAddressIsBlocked } from "./checkIfIPAddressIsBlocked"; +import { checkIfRequestIsBlocked } from "./checkIfRequestIsBlocked"; import { contextFromRequest } from "./contextFromRequest"; import { readBodyStream } from "./readBodyStream"; import { shouldDiscoverRoute } from "./shouldDiscoverRoute"; @@ -59,7 +59,7 @@ function callListenerWithContext( // If using http2, the context is not available in the callback without this res.on("finish", bindContext(createOnFinishRequestHandler(res, agent))); - if (checkIfIPAddressIsBlocked(res, agent)) { + if (checkIfRequestIsBlocked(res, agent)) { // The return is necessary to prevent the listener from being called return; }