From e2ae3e3267e054fb779a49f5f73cadada1fc9479 Mon Sep 17 00:00:00 2001 From: Sven Efftinge Date: Tue, 12 Sep 2023 10:33:51 +0200 Subject: [PATCH] [server] improved logging (#18697) --- components/server/src/jobs/runner.ts | 49 ++++++++++--------- .../server/src/messaging/redis-subscriber.ts | 27 +++++----- components/server/src/server.ts | 11 +++++ components/server/src/util/log-context.ts | 19 +++++-- .../websocket/websocket-connection-manager.ts | 28 +++++++---- .../server/src/workspace/workspace-starter.ts | 28 +++++------ 6 files changed, 97 insertions(+), 65 deletions(-) diff --git a/components/server/src/jobs/runner.ts b/components/server/src/jobs/runner.ts index dfcc27ed5afedd..b601da6db7141c 100644 --- a/components/server/src/jobs/runner.ts +++ b/components/server/src/jobs/runner.ts @@ -18,6 +18,7 @@ import { WebhookEventGarbageCollector } from "./webhook-gc"; import { WorkspaceGarbageCollector } from "./workspace-gc"; import { SnapshotsJob } from "./snapshots"; import { RelationshipUpdateJob } from "../authorization/relationship-updater-job"; +import { runWithContext } from "../util/log-context"; export const Job = Symbol("Job"); @@ -78,29 +79,31 @@ export class JobRunner { try { await this.mutex.using([job.name, ...(job.lockedResources || [])], job.frequencyMs, async (signal) => { - log.info(`Acquired lock for job ${job.name}.`, logCtx); - // we want to hold the lock for the entire duration of the job, so we return earliest after frequencyMs - const timeout = new Promise((resolve) => setTimeout(resolve, job.frequencyMs)); - const timer = jobsDurationSeconds.startTimer({ name: job.name }); - reportJobStarted(job.name); - const now = new Date().getTime(); - try { - await job.run(); - log.info(`Successfully finished job ${job.name}`, { - ...logCtx, - jobTookSec: `${(new Date().getTime() - now) / 1000}s`, - }); - reportJobCompleted(job.name, true); - } catch (err) { - log.error(`Error while running job ${job.name}`, err, { - ...logCtx, - jobTookSec: `${(new Date().getTime() - now) / 1000}s`, - }); - reportJobCompleted(job.name, false); - } finally { - jobsDurationSeconds.observe(timer()); - await timeout; - } + await runWithContext(job.name, {}, async () => { + log.info(`Acquired lock for job ${job.name}.`, logCtx); + // we want to hold the lock for the entire duration of the job, so we return earliest after frequencyMs + const timeout = new Promise((resolve) => setTimeout(resolve, job.frequencyMs)); + const timer = jobsDurationSeconds.startTimer({ name: job.name }); + reportJobStarted(job.name); + const now = new Date().getTime(); + try { + await job.run(); + log.info(`Successfully finished job ${job.name}`, { + ...logCtx, + jobTookSec: `${(new Date().getTime() - now) / 1000}s`, + }); + reportJobCompleted(job.name, true); + } catch (err) { + log.error(`Error while running job ${job.name}`, err, { + ...logCtx, + jobTookSec: `${(new Date().getTime() - now) / 1000}s`, + }); + reportJobCompleted(job.name, false); + } finally { + jobsDurationSeconds.observe(timer()); + await timeout; + } + }); }); } catch (err) { if (err instanceof ResourceLockedError || err instanceof ExecutionError) { diff --git a/components/server/src/messaging/redis-subscriber.ts b/components/server/src/messaging/redis-subscriber.ts index 5f56104180338d..3becb5fbaa4a47 100644 --- a/components/server/src/messaging/redis-subscriber.ts +++ b/components/server/src/messaging/redis-subscriber.ts @@ -29,6 +29,7 @@ import { } from "../prometheus-metrics"; import { Redis } from "ioredis"; import { WorkspaceDB } from "@gitpod/gitpod-db/lib"; +import { runWithContext } from "../util/log-context"; const UNDEFINED_KEY = "undefined"; @@ -54,18 +55,20 @@ export class RedisSubscriber { } this.redis.on("message", async (channel: string, message: string) => { - reportRedisUpdateReceived(channel); - - let err: Error | undefined; - try { - await this.onMessage(channel, message); - log.debug("[redis] Succesfully handled update", { channel, message }); - } catch (e) { - err = e; - log.error("[redis] Failed to handle message from Pub/Sub", e, { channel, message }); - } finally { - reportRedisUpdateCompleted(channel, err); - } + await runWithContext("redis-subscriber", {}, async () => { + reportRedisUpdateReceived(channel); + + let err: Error | undefined; + try { + await this.onMessage(channel, message); + log.debug("[redis] Succesfully handled update", { channel, message }); + } catch (e) { + err = e; + log.error("[redis] Failed to handle message from Pub/Sub", e, { channel, message }); + } finally { + reportRedisUpdateCompleted(channel, err); + } + }); }); } diff --git a/components/server/src/server.ts b/components/server/src/server.ts index 8449844fcfe5df..7bfeb58c5b9548 100644 --- a/components/server/src/server.ts +++ b/components/server/src/server.ts @@ -47,6 +47,7 @@ import { GitHubEnterpriseApp } from "./prebuilds/github-enterprise-app"; import { JobRunner } from "./jobs/runner"; import { RedisSubscriber } from "./messaging/redis-subscriber"; import { HEADLESS_LOGS_PATH_PREFIX, HEADLESS_LOG_DOWNLOAD_PATH_PREFIX } from "./workspace/headless-log-service"; +import { runWithContext } from "./util/log-context"; @injectable() export class Server { @@ -138,6 +139,16 @@ export class Server { // Install passport await this.authenticator.init(app); + // log context + app.use(async (req: express.Request, res: express.Response, next: express.NextFunction) => { + try { + const userId = req.user ? req.user.id : undefined; + await runWithContext("http", { userId }, () => next()); + } catch (err) { + next(err); + } + }); + // Ensure that host contexts of dynamic auth providers are initialized. await this.hostCtxProvider.init(); diff --git a/components/server/src/util/log-context.ts b/components/server/src/util/log-context.ts index 8d76ea93071c21..400efcf1f84dea 100644 --- a/components/server/src/util/log-context.ts +++ b/components/server/src/util/log-context.ts @@ -13,24 +13,33 @@ import { v4 } from "uuid"; type EnhancedLogContext = LogContext & { contextId?: string; contextTimeMs: number; + contextKind: string; }; + const asyncLocalStorage = new AsyncLocalStorage(); const augmenter: LogContext.Augmenter = (ctx) => { const globalContext = asyncLocalStorage.getStore(); - const contextTime = globalContext?.contextTimeMs ? Date.now() - globalContext.contextTimeMs : undefined; - return { + const contextTimeMs = globalContext?.contextTimeMs ? Date.now() - globalContext.contextTimeMs : undefined; + const result = { ...globalContext, - contextTime, + contextTimeMs, ...ctx, }; + // if its an empty object return undefined + return Object.keys(result).length === 0 ? undefined : result; }; LogContext.setAugmenter(augmenter); -export async function runWithContext(context: LogContext, fun: () => T): Promise { +export async function runWithContext( + contextKind: string, + context: LogContext & { contextId?: string }, + fun: () => T, +): Promise { return asyncLocalStorage.run( { ...context, - contextId: v4(), + contextKind, + contextId: context.contextId || v4(), contextTimeMs: Date.now(), }, fun, diff --git a/components/server/src/websocket/websocket-connection-manager.ts b/components/server/src/websocket/websocket-connection-manager.ts index 6eeea6e96c41ee..946fee7e328a7b 100644 --- a/components/server/src/websocket/websocket-connection-manager.ts +++ b/components/server/src/websocket/websocket-connection-manager.ts @@ -374,21 +374,33 @@ class GitpodJsonRpcProxyFactory extends JsonRpcProxyFactory } protected async onRequest(method: string, ...args: any[]): Promise { + const span = TraceContext.startSpan(method, undefined); const userId = this.clientMetadata.userId; + const requestId = span.context().toTraceId(); return runWithContext( + "request", { userId, + contextId: requestId, }, () => { - return this.internalOnRequest(method, ...args); + try { + return this.internalOnRequest(span, requestId, method, ...args); + } finally { + span.finish(); + } }, ); } - private async internalOnRequest(method: string, ...args: any[]): Promise { - const span = TraceContext.startSpan(method, undefined); - const ctx = { span }; + private async internalOnRequest( + span: opentracing.Span, + requestId: string, + method: string, + ...args: any[] + ): Promise { const userId = this.clientMetadata.userId; + const ctx = { span }; const timer = apiCallDurationHistogram.startTimer(); try { // generic tracing data @@ -432,7 +444,7 @@ class GitpodJsonRpcProxyFactory extends JsonRpcProxyFactory observeAPICallsDuration(method, 200, timer()); return result; } catch (e) { - const traceID = span.context().toTraceId(); + const requestIdMessage = ` If this error is unexpected, please quote the request ID '${requestId}' when reaching out to Gitpod Support.`; if (ApplicationError.hasErrorCode(e)) { increaseApiCallCounter(method, e.code); observeAPICallsDuration(method, e.code, timer()); @@ -449,13 +461,13 @@ class GitpodJsonRpcProxyFactory extends JsonRpcProxyFactory message: e.message, }, ); - throw new ResponseError(e.code, e.message, e.data); + throw new ResponseError(e.code, e.message + requestIdMessage, e.data); } else { TraceContext.setError(ctx, e); // this is a "real" error const err = new ApplicationError( ErrorCodes.INTERNAL_SERVER_ERROR, - `Internal server error. Please quote trace ID: '${traceID}' when reaching to Gitpod Support`, + `Internal server error: '${e.message}'` + requestIdMessage, ); increaseApiCallCounter(method, err.code); observeAPICallsDuration(method, err.code, timer()); @@ -464,8 +476,6 @@ class GitpodJsonRpcProxyFactory extends JsonRpcProxyFactory log.error({ userId }, `Request ${method} failed with internal server error`, e, { method, args }); throw new ResponseError(ErrorCodes.INTERNAL_SERVER_ERROR, e.message); } - } finally { - span.finish(); } } diff --git a/components/server/src/workspace/workspace-starter.ts b/components/server/src/workspace/workspace-starter.ts index c9f746497be0a8..5541a93f65ba3a 100644 --- a/components/server/src/workspace/workspace-starter.ts +++ b/components/server/src/workspace/workspace-starter.ts @@ -484,9 +484,13 @@ export class WorkspaceStarter { const span = TraceContext.startSpan("actuallyStartWorkspace", ctx); const region = instance.configuration.regionPreference; span.setTag("region_preference", region); - log.info("Attempting to start workspace", { - instanceID: instance.id, - userID: user.id, + const logCtx: LogContext = { + instanceId: instance.id, + userId: user.id, + organizationId: workspace.organizationId, + workspaceId: workspace.id, + }; + log.info(logCtx, "Attempting to start workspace", { forceRebuild: forceRebuild, }); @@ -524,18 +528,10 @@ export class WorkspaceStarter { const ideUrlPromise = new Deferred(); const before = Date.now(); const logSuccess = (fromWsManager: boolean) => { - log.info( - { - instanceId: instance.id, - userId: workspace.ownerId, - workspaceId: workspace.id, - }, - "Received ideURL", - { - tookMs: Date.now() - before, - fromWsManager, - }, - ); + log.info(logCtx, "Received ideURL", { + tookMs: Date.now() - before, + fromWsManager, + }); }; const doStartWorkspace = async () => { @@ -623,7 +619,7 @@ export class WorkspaceStarter { intervalHandle.dispose(); } } catch (err) { - this.logAndTraceStartWorkspaceError({ span }, { userId: user.id, instanceId: instance.id }, err); + this.logAndTraceStartWorkspaceError({ span }, logCtx, err); return { instanceID: instance.id }; } finally {