From 12124914694b18568a8e78ac735a4819f24e54fd Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Mon, 7 Oct 2024 17:28:18 -0700 Subject: [PATCH] fix(collector): move to an otel collector --- .github/workflows/docker-images.yml | 29 ---- config/router.yaml | 2 + images/otel/Dockerfile | 17 --- images/otel/entrypoint.sh | 4 - images/otel/otel-collector-config.yaml | 63 -------- infrastructure/client-api/src/config/index.ts | 4 +- infrastructure/client-api/src/main.ts | 38 +---- infrastructure/list-api/src/config/index.ts | 5 + infrastructure/list-api/src/main.ts | 38 +---- .../src/config/index.ts | 5 + .../parser-graphql-wrapper/src/main.ts | 38 +---- infrastructure/user-api/src/config/index.ts | 4 +- infrastructure/user-api/src/main.ts | 38 +---- infrastructure/user-list-search/apollo_ecs.tf | 6 +- infrastructure/user-list-search/ecs.tf | 55 ------- infrastructure/user-list-search/locals.tf | 2 + .../user-list-search/queue_users_ecs.tf | 2 +- .../v3-proxy-api/src/config/index.ts | 5 + infrastructure/v3-proxy-api/src/main.ts | 38 +---- .../sentry/src/featureFlagTraceSampler.ts | 4 + packages/tracing/package.json | 5 + packages/tracing/src/tracing.ts | 92 +++++++----- pnpm-lock.yaml | 19 ++- servers/client-api/config/router.yaml | 66 ++++----- servers/client-api/config/supergraph.yaml | 138 +++++++++--------- servers/list-api/src/config/index.ts | 2 +- servers/otel-collector/httpd.conf | 1 - .../src/config/index.ts | 2 +- servers/user-api/src/config/index.ts | 2 +- servers/user-list-search/src/config/index.ts | 2 +- servers/v3-proxy-api/src/config/index.ts | 2 +- 31 files changed, 241 insertions(+), 487 deletions(-) delete mode 100644 .github/workflows/docker-images.yml create mode 100644 config/router.yaml delete mode 100644 images/otel/Dockerfile delete mode 100644 images/otel/entrypoint.sh delete mode 100644 images/otel/otel-collector-config.yaml diff --git a/.github/workflows/docker-images.yml b/.github/workflows/docker-images.yml deleted file mode 100644 index 665e27aff..000000000 --- a/.github/workflows/docker-images.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Docker Images -on: - pull_request: - push: - branches: - - main - schedule: # Rebuild images nightly - - cron: '0 0 * * *' - -jobs: - otel: - runs-on: ubuntu-latest - steps: - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build Base - uses: docker/build-push-action@v6 - with: - push: ${{ github.ref == 'refs/heads/main' && true || false}} - platforms: linux/amd64,linux/arm64 - tags: pocket/opentelemetry-collector-contrib - context: "{{defaultContext}}:images/otel" \ No newline at end of file diff --git a/config/router.yaml b/config/router.yaml new file mode 100644 index 000000000..206aecb8f --- /dev/null +++ b/config/router.yaml @@ -0,0 +1,2 @@ +health_check: + enabled: false diff --git a/images/otel/Dockerfile b/images/otel/Dockerfile deleted file mode 100644 index 8723ac064..000000000 --- a/images/otel/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -# Use an alpine image as our base image, this allows us to define our own entry point script and have access to sh or bash, which the otel image does not have -FROM alpine:3 - -# Copy the otelcol-contrib binary from the official image -COPY --from=otel/opentelemetry-collector-contrib:0.111.0 /otelcol-contrib /otelcol-contrib - -COPY otel-collector-config.yaml /etc/otelcol-contrib/config.yaml -COPY entrypoint.sh /entrypoint.sh -RUN chmod a+x /entrypoint.sh - -#Set the ENV variable for the google credentials that will be used by the google cloud exporter and mounted via an environment variable that will be saved to this file via our entrypoint script -ENV GOOGLE_APPLICATION_CREDENTIALS=/etc/otelcol-contrib/key.json -WORKDIR / - -ENTRYPOINT [ "./entrypoint.sh" ] - -CMD [ "/otelcol-contrib", "--config", "/etc/otelcol-contrib/config.yaml" ] \ No newline at end of file diff --git a/images/otel/entrypoint.sh b/images/otel/entrypoint.sh deleted file mode 100644 index 6c6fa0718..000000000 --- a/images/otel/entrypoint.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -set -e -echo "$GOOGLE_APPLICATION_CREDENTIALS_JSON" > /etc/otelcol-contrib/key.json -exec "$@" \ No newline at end of file diff --git a/images/otel/otel-collector-config.yaml b/images/otel/otel-collector-config.yaml deleted file mode 100644 index 2cdddcfd2..000000000 --- a/images/otel/otel-collector-config.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# Note this is only built nightly and will take effect on the next deploy after being built. -receivers: - otlp: - protocols: - grpc: - endpoint: "0.0.0.0:4317" # gRPC port for receiving traces, metrics, and logs over gRPC - http: - endpoint: "0.0.0.0:4318" # http port for receiving traces, metrics, and logs over http -exporters: - googlecloud: - log: - default_log_name: opentelemetry.io/collector-exported-log - # debug: - # verbosity: detailed -processors: - memory_limiter: - check_interval: 1s - limit_percentage: 30 - spike_limit_percentage: 10 - batch: - # batch metrics before sending to reduce API usage - send_batch_max_size: 200 - send_batch_size: 200 - timeout: 5s - resourcedetection/ecs: - detectors: [env, ecs] - timeout: 2s - override: false - resource/cleanup_for_google: - attributes: - - key: service.namespace - value: pocket - action: insert - # We must set the region to us-east1, as the google exporter uses this to know where to send data to. - - key: cloud.region - value: us-east1 - action: upsert - - key: cloud.availability_zone - value: us-east1-b - action: upsert - # set this to the ecs task id - - key: host.id - from_attribute: aws.ecs.task.id - action: upsert - # https://opentelemetry.io/docs/specs/semconv/resource/deployment-environment/ - - key: deployment.environment.name - value: ${env:DEPLOYMENT_ENVIRONMENT_NAME} - action: upsert - -service: - pipelines: - traces: - receivers: [otlp] - processors: [resourcedetection/ecs, resource/cleanup_for_google, memory_limiter, batch] - exporters: [googlecloud] - # metrics: - # receivers: [otlp] - # processors: [resourcedetection/ecs, resource/cleanup_for_google, memory_limiter, batch] - # exporters: [googlecloud] - logs: - receivers: [otlp] - processors: [resourcedetection/ecs, resource/cleanup_for_google, memory_limiter, batch] - exporters: [googlecloud] \ No newline at end of file diff --git a/infrastructure/client-api/src/config/index.ts b/infrastructure/client-api/src/config/index.ts index 00ecb8fbd..681413259 100644 --- a/infrastructure/client-api/src/config/index.ts +++ b/infrastructure/client-api/src/config/index.ts @@ -47,6 +47,8 @@ export const config = { }, }, tracing: { - host: 'localhost', + url: isDev + ? 'https://otel-collector.getpocket.dev:443' + : 'https://otel-collector.readitlater.com:443', }, }; diff --git a/infrastructure/client-api/src/main.ts b/infrastructure/client-api/src/main.ts index 0aab79814..2f7f737a8 100644 --- a/infrastructure/client-api/src/main.ts +++ b/infrastructure/client-api/src/main.ts @@ -235,8 +235,8 @@ class ClientAPI extends TerraformStack { value: config.isProd ? 'production' : 'development', }, { - name: 'OTLP_COLLECTOR_HOST', - value: `${config.tracing.host}`, + name: 'OTLP_COLLECTOR_URL', + value: `${config.tracing.url}`, }, { name: 'REDIS_ENDPOINT', @@ -266,40 +266,6 @@ class ClientAPI extends TerraformStack { startPeriod: 0, }, }, - { - name: 'otel-collector', - containerImage: 'pocket/opentelemetry-collector-contrib', - essential: true, - logMultilinePattern: '^\\S.+', - logGroup: this.createCustomLogGroup('otel-collector'), - portMappings: [ - { - hostPort: 4138, - containerPort: 4138, - }, - { - hostPort: 4137, - containerPort: 4137, - }, - { - hostPort: 55681, - containerPort: 55681, - }, - ], - envVars: [ - { - name: 'DEPLOYMENT_ENVIRONMENT_NAME', - value: config.tags.env_code, - }, - ], - secretEnvVars: [ - { - name: 'GOOGLE_APPLICATION_CREDENTIALS_JSON', - valueFrom: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/GCP_SA_TRACES:::`, - }, - ], - repositoryCredentialsParam: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/DockerHub`, - }, ], codeDeploy: { useCodeDeploy: true, diff --git a/infrastructure/list-api/src/config/index.ts b/infrastructure/list-api/src/config/index.ts index 773fd1e9b..3834ef425 100644 --- a/infrastructure/list-api/src/config/index.ts +++ b/infrastructure/list-api/src/config/index.ts @@ -50,4 +50,9 @@ export const config = { userEvents: `PocketEventBridge-${environment}-UserEventTopic`, }, }, + tracing: { + url: isDev + ? 'https://otel-collector.getpocket.dev:443' + : 'https://otel-collector.readitlater.com:443', + }, }; diff --git a/infrastructure/list-api/src/main.ts b/infrastructure/list-api/src/main.ts index 052cb348d..b93d8cb5a 100644 --- a/infrastructure/list-api/src/main.ts +++ b/infrastructure/list-api/src/main.ts @@ -230,6 +230,10 @@ class ListAPI extends TerraformStack { name: 'EVENT_BUS_NAME', value: config.envVars.eventBusName, }, + { + name: 'OTLP_COLLECTOR_URL', + value: config.tracing.url, + }, ], secretEnvVars: [ { @@ -304,40 +308,6 @@ class ListAPI extends TerraformStack { logGroup: this.createCustomLogGroup('app'), logMultilinePattern: '^\\S.+', }, - { - name: 'otel-collector', - containerImage: 'pocket/opentelemetry-collector-contrib', - essential: true, - logMultilinePattern: '^\\S.+', - logGroup: this.createCustomLogGroup('otel-collector'), - portMappings: [ - { - hostPort: 4138, - containerPort: 4138, - }, - { - hostPort: 4137, - containerPort: 4137, - }, - { - hostPort: 55681, - containerPort: 55681, - }, - ], - envVars: [ - { - name: 'DEPLOYMENT_ENVIRONMENT_NAME', - value: config.tags.env_code, - }, - ], - secretEnvVars: [ - { - name: 'GOOGLE_APPLICATION_CREDENTIALS_JSON', - valueFrom: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/GCP_SA_TRACES:::`, - }, - ], - repositoryCredentialsParam: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/DockerHub`, - }, ], codeDeploy: { useCodeDeploy: true, diff --git a/infrastructure/parser-graphql-wrapper/src/config/index.ts b/infrastructure/parser-graphql-wrapper/src/config/index.ts index 5c5243439..ae4613bd8 100644 --- a/infrastructure/parser-graphql-wrapper/src/config/index.ts +++ b/infrastructure/parser-graphql-wrapper/src/config/index.ts @@ -42,4 +42,9 @@ export const config = { databaseName: 'readitla_shares', masterUsername: 'share_urls', }, + tracing: { + url: isDev + ? 'https://otel-collector.getpocket.dev:443' + : 'https://otel-collector.readitlater.com:443', + }, }; diff --git a/infrastructure/parser-graphql-wrapper/src/main.ts b/infrastructure/parser-graphql-wrapper/src/main.ts index 247b798e5..a6e6d6960 100644 --- a/infrastructure/parser-graphql-wrapper/src/main.ts +++ b/infrastructure/parser-graphql-wrapper/src/main.ts @@ -176,6 +176,10 @@ class ParserGraphQLWrapper extends TerraformStack { name: 'ITEM_SUMMARY_TABLE', value: dynamodb.itemSummaryTable.dynamodb.name, }, + { + name: 'OTLP_COLLECTOR_URL', + value: config.tracing.url, + }, ], healthCheck: { command: [ @@ -278,40 +282,6 @@ class ParserGraphQLWrapper extends TerraformStack { }, ], }, - { - name: 'otel-collector', - containerImage: 'pocket/opentelemetry-collector-contrib', - essential: true, - logMultilinePattern: '^\\S.+', - logGroup: this.createCustomLogGroup('otel-collector'), - portMappings: [ - { - hostPort: 4138, - containerPort: 4138, - }, - { - hostPort: 4137, - containerPort: 4137, - }, - { - hostPort: 55681, - containerPort: 55681, - }, - ], - envVars: [ - { - name: 'DEPLOYMENT_ENVIRONMENT_NAME', - value: config.tags.env_code, - }, - ], - secretEnvVars: [ - { - name: 'GOOGLE_APPLICATION_CREDENTIALS_JSON', - valueFrom: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/GCP_SA_TRACES`, - }, - ], - repositoryCredentialsParam: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/DockerHub`, - }, ], codeDeploy: { useCodeDeploy: true, diff --git a/infrastructure/user-api/src/config/index.ts b/infrastructure/user-api/src/config/index.ts index 53765fc99..6d9126d99 100644 --- a/infrastructure/user-api/src/config/index.ts +++ b/infrastructure/user-api/src/config/index.ts @@ -37,6 +37,8 @@ export const config = { env_code: isDev ? 'dev' : 'prod', }, tracing: { - host: 'localhost', + url: isDev + ? 'https://otel-collector.getpocket.dev:443' + : 'https://otel-collector.readitlater.com:443', }, }; diff --git a/infrastructure/user-api/src/main.ts b/infrastructure/user-api/src/main.ts index 3cf4c6a2d..5e99ca5d7 100644 --- a/infrastructure/user-api/src/main.ts +++ b/infrastructure/user-api/src/main.ts @@ -112,6 +112,10 @@ class UserAPI extends TerraformStack { startPeriod: 0, }, envVars: [ + { + name: 'OTLP_COLLECTOR_URL', + value: config.tracing.url, + }, { name: 'NODE_ENV', value: process.env.NODE_ENV ?? 'development', @@ -198,40 +202,6 @@ class UserAPI extends TerraformStack { logGroup: this.createCustomLogGroup('app'), logMultilinePattern: '^\\S.+', }, - { - name: 'otel-collector', - containerImage: 'pocket/opentelemetry-collector-contrib', - essential: true, - logMultilinePattern: '^\\S.+', - logGroup: this.createCustomLogGroup('otel-collector'), - portMappings: [ - { - hostPort: 4138, - containerPort: 4138, - }, - { - hostPort: 4137, - containerPort: 4137, - }, - { - hostPort: 55681, - containerPort: 55681, - }, - ], - envVars: [ - { - name: 'DEPLOYMENT_ENVIRONMENT_NAME', - value: config.tags.env_code, - }, - ], - secretEnvVars: [ - { - name: 'GOOGLE_APPLICATION_CREDENTIALS_JSON', - valueFrom: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/GCP_SA_TRACES:::`, - }, - ], - repositoryCredentialsParam: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/DockerHub`, - }, ], codeDeploy: { useCodeDeploy: true, diff --git a/infrastructure/user-list-search/apollo_ecs.tf b/infrastructure/user-list-search/apollo_ecs.tf index e520b710c..d38dc55de 100644 --- a/infrastructure/user-list-search/apollo_ecs.tf +++ b/infrastructure/user-list-search/apollo_ecs.tf @@ -110,13 +110,17 @@ module "apollo" { { name = "CORPUS_SEARCH_ENDPOINT" value = module.corpus_embeddings.opensearch_endpoint + }, + { + name = "OTLP_COLLECTOR_URL" + value = local.workspace.otlpCollectorUrl } ] } resource "aws_ecs_task_definition" "apollo" { family = "${local.prefix}-Apollo" - container_definitions = "[${module.apollo.json_map_encoded}, ${module.otel.json_map_encoded}]" + container_definitions = "[${module.apollo.json_map_encoded}]" task_role_arn = aws_iam_role.ecs_task_role.arn execution_role_arn = aws_iam_role.ecs_execution_role.arn diff --git a/infrastructure/user-list-search/ecs.tf b/infrastructure/user-list-search/ecs.tf index 9780df9ba..e10d1ff5e 100644 --- a/infrastructure/user-list-search/ecs.tf +++ b/infrastructure/user-list-search/ecs.tf @@ -101,58 +101,3 @@ resource "aws_security_group" "ecs_security_group" { depends_on = [aws_security_group.alb_security_group] } -resource "aws_cloudwatch_log_group" "xray" { - name = "/ecs/${local.name}/${local.env}/xray" - retention_in_days = 30 -} - -module "otel" { - source = "cloudposse/ecs-container-definition/aws" - version = "0.61.1" - - essential = true - container_name = "otel-collector" - container_image = "pocket/opentelemetry-collector-contrib" - - repository_credentials = { - credentialsParameter : local.container_credential - } - - log_configuration = { - logDriver = "awslogs" - secretOptions = [] - options = { - awslogs-region = data.aws_region.current.name - awslogs-group = aws_cloudwatch_log_group.xray.name - awslogs-stream-prefix = "ecs" - } - } - - environment = [{ - name = "DEPLOYMENT_ENVIRONMENT_NAME", - value = local.tags.env_code, - }] - - secrets = [{ - name = "GOOGLE_APPLICATION_CREDENTIALS_JSON" - valueFrom = "${local.secret_path_shared}GCP_SA_TRACES:::" - }] - - port_mappings = [ - { - containerPort = 4138 - hostPort = 4138 - }, - { - containerPort = 4137 - hostPort = 4137 - }, - { - containerPort = 55681 - hostPort = 55681 - } - ] - container_cpu = null - container_memory = null - container_memory_reservation = null -} diff --git a/infrastructure/user-list-search/locals.tf b/infrastructure/user-list-search/locals.tf index 0c30dc3bd..6c27d0d36 100644 --- a/infrastructure/user-list-search/locals.tf +++ b/infrastructure/user-list-search/locals.tf @@ -81,6 +81,7 @@ locals { sns_topic_corpus_events = "PocketEventBridge-Dev-CorpusEventsTopic" sns_topic_collection_events = "PocketEventBridge-Dev-CollectionEventTopic" userApiUri = "https://user-list-search.getpocket.dev" + otlpCollectorUrl = "https://otel-collector.getpocket.dev:443" } UserListSearch-Prod = { @@ -97,6 +98,7 @@ locals { sns_topic_corpus_events = "PocketEventBridge-Prod-CorpusEventsTopic" sns_topic_collection_events = "PocketEventBridge-Prod-CollectionEventTopic" userApiUri = "https://user-list-search.readitlater.com" + otlpCollectorUrl = "https://otel-collector.readitlater.com:443" } } diff --git a/infrastructure/user-list-search/queue_users_ecs.tf b/infrastructure/user-list-search/queue_users_ecs.tf index 9885ddc68..0c0198615 100644 --- a/infrastructure/user-list-search/queue_users_ecs.tf +++ b/infrastructure/user-list-search/queue_users_ecs.tf @@ -72,7 +72,7 @@ module "queue_users" { resource "aws_ecs_task_definition" "queue_users" { family = "${local.prefix}-QueueUsers" - container_definitions = "[${module.queue_users.json_map_encoded}, ${module.otel.json_map_encoded}]" + container_definitions = "[${module.queue_users.json_map_encoded}]" task_role_arn = aws_iam_role.ecs_task_role.arn execution_role_arn = aws_iam_role.ecs_execution_role.arn diff --git a/infrastructure/v3-proxy-api/src/config/index.ts b/infrastructure/v3-proxy-api/src/config/index.ts index fae3a2019..b3a4382af 100644 --- a/infrastructure/v3-proxy-api/src/config/index.ts +++ b/infrastructure/v3-proxy-api/src/config/index.ts @@ -38,4 +38,9 @@ export const config = { component_code: `pocket-${name.toLowerCase()}`, env_code: isDev ? 'dev' : 'prod', }, + tracing: { + url: isDev + ? 'https://otel-collector.getpocket.dev:443' + : 'https://otel-collector.readitlater.com:443', + }, }; diff --git a/infrastructure/v3-proxy-api/src/main.ts b/infrastructure/v3-proxy-api/src/main.ts index 24f8f7527..87d636b52 100644 --- a/infrastructure/v3-proxy-api/src/main.ts +++ b/infrastructure/v3-proxy-api/src/main.ts @@ -112,6 +112,10 @@ class Stack extends TerraformStack { name: 'ENVIRONMENT', value: process.env.NODE_ENV ?? 'development', // this gives us a nice lowercase production and development }, + { + name: 'OTLP_COLLECTOR_URL', + value: config.tracing.url, + }, ], secretEnvVars: [ { @@ -128,40 +132,6 @@ class Stack extends TerraformStack { }, ], }, - { - name: 'otel-collector', - containerImage: 'pocket/opentelemetry-collector-contrib', - essential: true, - logMultilinePattern: '^\\S.+', - logGroup: this.createCustomLogGroup('otel-collector'), - portMappings: [ - { - hostPort: 4138, - containerPort: 4138, - }, - { - hostPort: 4137, - containerPort: 4137, - }, - { - hostPort: 55681, - containerPort: 55681, - }, - ], - envVars: [ - { - name: 'DEPLOYMENT_ENVIRONMENT_NAME', - value: config.tags.env_code, - }, - ], - secretEnvVars: [ - { - name: 'GOOGLE_APPLICATION_CREDENTIALS_JSON', - valueFrom: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/GCP_SA_TRACES:::`, - }, - ], - repositoryCredentialsParam: `arn:aws:secretsmanager:${region.name}:${caller.accountId}:secret:Shared/DockerHub`, - }, ], codeDeploy: { useCodeDeploy: true, diff --git a/packages/sentry/src/featureFlagTraceSampler.ts b/packages/sentry/src/featureFlagTraceSampler.ts index 82fe4673e..3930c814a 100644 --- a/packages/sentry/src/featureFlagTraceSampler.ts +++ b/packages/sentry/src/featureFlagTraceSampler.ts @@ -15,6 +15,10 @@ export function featureFlagTraceSampler( flagName: string, ): NodeOptions['tracesSampler'] { return (context: SamplingContext) => { + // Continue trace decision, if there is any parentSampled information + if (context.parentSampled !== undefined) { + return context.parentSampled; + } const variant = client.getVariant(flagName, context); if (variant.payload != null) { if (variant.payload.type === 'number') { diff --git a/packages/tracing/package.json b/packages/tracing/package.json index da0cc2745..45da8ecb0 100644 --- a/packages/tracing/package.json +++ b/packages/tracing/package.json @@ -69,12 +69,16 @@ "@opentelemetry/auto-instrumentations-node": "0.50.0", "@opentelemetry/context-async-hooks": "1.26.0", "@opentelemetry/core": "1.26.0", + "@opentelemetry/exporter-logs-otlp-grpc": "0.53.0", + "@opentelemetry/exporter-logs-otlp-http": "0.53.0", "@opentelemetry/exporter-metrics-otlp-grpc": "0.53.0", + "@opentelemetry/exporter-metrics-otlp-http": "0.53.0", "@opentelemetry/exporter-trace-otlp-grpc": "0.53.0", "@opentelemetry/exporter-trace-otlp-http": "0.53.0", "@opentelemetry/id-generator-aws-xray": "1.2.2", "@opentelemetry/instrumentation-knex": "0.40.0", "@opentelemetry/propagator-aws-xray": "1.26.0", + "@opentelemetry/resource-detector-aws": "1.6.1", "@opentelemetry/resources": "1.26.0", "@opentelemetry/sdk-metrics": "1.26.0", "@opentelemetry/sdk-node": "0.53.0", @@ -82,6 +86,7 @@ "@opentelemetry/sdk-trace-node": "1.26.0", "@opentelemetry/semantic-conventions": "1.27.0", "@opentelemetry/winston-transport": "0.6.0", + "@pocket-tools/ts-logger": "workspace:*", "@prisma/instrumentation": "5.19.1", "@sentry/node": "8.33.0", "@sentry/opentelemetry": "8.33.0", diff --git a/packages/tracing/src/tracing.ts b/packages/tracing/src/tracing.ts index 7c523f22c..c9cfdced8 100644 --- a/packages/tracing/src/tracing.ts +++ b/packages/tracing/src/tracing.ts @@ -1,14 +1,14 @@ import process from 'process'; -import { NodeSDK } from '@opentelemetry/sdk-node'; -import { - DiagConsoleLogger, - DiagLogLevel, - DiagLogger, - diag, -} from '@opentelemetry/api'; +import { NodeSDK, logs } from '@opentelemetry/sdk-node'; +import { DiagLogLevel, DiagLogger, diag } from '@opentelemetry/api'; import { KnexInstrumentation } from '@opentelemetry/instrumentation-knex'; -import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc'; +import { OTLPTraceExporter as HTTPOTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'; +import { OTLPTraceExporter as GRPCOTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc'; + +import { OTLPLogExporter as HTTPOTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http'; +import { OTLPLogExporter as GRPCOTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-grpc'; + import { PrismaInstrumentation } from '@prisma/instrumentation'; import { Detector, Resource } from '@opentelemetry/resources'; import { @@ -17,8 +17,10 @@ import { } from '@opentelemetry/semantic-conventions'; import { SentrySampler } from '@sentry/opentelemetry'; -// import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; -// import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc'; +import { PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; +import { OTLPMetricExporter as HTTPOTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http'; +import { OTLPMetricExporter as GRPCOTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc'; + import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; import { @@ -29,11 +31,15 @@ import { hostDetectorSync, processDetectorSync, } from '@opentelemetry/resources'; +import { awsEcsDetectorSync } from '@opentelemetry/resource-detector-aws'; import * as Sentry from '@sentry/node'; import type { NodeClient } from '@sentry/node'; + +import { serverLogger } from '@pocket-tools/ts-logger'; import { BatchSpanProcessor, + BufferConfig, ParentBasedSampler, } from '@opentelemetry/sdk-trace-base'; import { AWSXRayPropagator } from '@opentelemetry/propagator-aws-xray'; @@ -57,9 +63,8 @@ export type TracingConfig = { release: string; samplingRatio?: number; graphQLDepth?: number; - grpcDefaultPort?: number; - httpDefaultPort?: number; - host?: string; + url?: string; + protocol?: 'GRPC' | 'HTTP'; logger?: DiagLogger; sentry: NodeClient | undefined; additionalInstrumentations?: AdditionalInstrumentation[]; @@ -70,10 +75,8 @@ const tracingDefaults: TracingConfig = { release: 'unknown', sentry: undefined, graphQLDepth: 8, - grpcDefaultPort: 4317, - httpDefaultPort: 4318, - host: 'localhost', - logger: new DiagConsoleLogger(), + url: 'http://localhost:4318', + protocol: 'HTTP', additionalInstrumentations: [], }; @@ -96,6 +99,13 @@ function awaitAttributes(detector: DetectorSync): Detector { }; } +const batchConfig: BufferConfig = { + maxQueueSize: 4096, + maxExportBatchSize: 1000, + scheduledDelayMillis: 1000, + exportTimeoutMillis: 5000, +}; + /** * function to setup open-telemetry tracing config * Note: this function has to run before initial @@ -110,9 +120,6 @@ export async function nodeSDKBuilder(config: TracingConfig) { * sample apps: https://github.com/aws-observability/aws-otel-community/blob/master/sample-apps/javascript-sample-app/nodeSDK.js */ - //tracing level set for open-telemetry - diag.setLogger(config.logger ?? new DiagConsoleLogger(), DiagLogLevel.WARN); - const _resource = Resource.default().merge( new Resource({ [ATTR_SERVICE_NAME]: config.serviceName, @@ -120,22 +127,36 @@ export async function nodeSDKBuilder(config: TracingConfig) { }), ); - const _traceExporter = new OTLPTraceExporter({ - //collector url - url: `http://${config.host}:${config.grpcDefaultPort}`, + const _traceExporter = + config.protocol === 'HTTP' + ? new HTTPOTLPTraceExporter({ + //collector url + url: `${config.url}/v1/traces`, + }) + : new GRPCOTLPTraceExporter({ url: config.url }); + + const _metricReader = new PeriodicExportingMetricReader({ + exporter: + config.protocol === 'HTTP' + ? new HTTPOTLPMetricExporter({ + url: `${config.url}/v1/metrics`, + }) + : new GRPCOTLPMetricExporter({ url: config.url }), + // once every 60 seconds, GCP supports 1 every 5 seconds for custom metrics https://cloud.google.com/monitoring/quotas#custom_metrics_quotas + // But lets just do 60 seconds for now as we figure it out + exportIntervalMillis: 60000, }); - // const _metricReader = new PeriodicExportingMetricReader({ - // exporter: new OTLPMetricExporter({ - // url: `http://${config.host}:${config.grpcDefaultPort}`, - // }), - // exportIntervalMillis: 10000, // once every 10 seconds, GCP supports 1 every 5 seconds for custom metrics https://cloud.google.com/monitoring/quotas#custom_metrics_quotas - // }); + const _logExporter = + config.protocol === 'HTTP' + ? new HTTPOTLPLogExporter({ url: `${config.url}/v1/logs` }) + : new GRPCOTLPLogExporter({ url: config.url }); // set up the default instrumentations for all implementors const instrumentations: any[] = [ getNodeAutoInstrumentations({ '@opentelemetry/instrumentation-fs': { + // Disabling Filesystem instrumentation because it is very noisey and memory intense. enabled: false, requireParentSpan: true, }, @@ -162,7 +183,6 @@ export async function nodeSDKBuilder(config: TracingConfig) { new additionalInstrumentationConstructors[instrumentation](), ); }); - const sdk = new NodeSDK({ textMapPropagator: new AWSXRayPropagator(), instrumentations, @@ -172,20 +192,26 @@ export async function nodeSDKBuilder(config: TracingConfig) { contextManager: new Sentry.SentryContextManager(), resource: _resource, idGenerator: new AWSXRayIdGenerator(), - spanProcessors: [new BatchSpanProcessor(_traceExporter)], - traceExporter: _traceExporter, - // metricReader: _metricReader, + spanProcessors: [new BatchSpanProcessor(_traceExporter, batchConfig)], + metricReader: _metricReader, + logRecordProcessors: [ + new logs.BatchLogRecordProcessor(_logExporter, batchConfig), + ], // TODO: Remove after issue is fixed // https://github.com/open-telemetry/opentelemetry-js/issues/4638 resourceDetectors: [ awaitAttributes(envDetectorSync), awaitAttributes(hostDetectorSync), awaitAttributes(processDetectorSync), + awaitAttributes(awsEcsDetectorSync), ], }); // this enables the API to record telemetry sdk.start(); + //tracing level set for open-telemetry + // this has to happen after the OTEL is setup so that the ts-logger is patched + diag.setLogger(config.logger ?? serverLogger, DiagLogLevel.WARN); diag.info('Tracer successfully started'); // Validate that the setup is correct diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 138bc77a3..456ab0181 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1012,7 +1012,7 @@ importers: version: 14.0.0-beta.11 ts-jest: specifier: 29.2.5 - version: 29.2.5(@babel/core@7.24.5)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@22.5.3)(ts-node@10.9.2(@types/node@22.5.3)(typescript@5.5.4)))(typescript@5.5.4) + version: 29.2.5(@babel/core@7.25.7)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.25.7))(esbuild@0.23.0)(jest@29.7.0(@types/node@22.5.3)(ts-node@10.9.2(@types/node@22.5.3)(typescript@5.5.4)))(typescript@5.5.4) ts-node: specifier: 10.9.2 version: 10.9.2(@types/node@22.5.3)(typescript@5.5.4) @@ -1061,7 +1061,7 @@ importers: version: 14.0.0-beta.11 ts-jest: specifier: 29.2.5 - version: 29.2.5(@babel/core@7.25.7)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.25.7))(esbuild@0.23.0)(jest@29.7.0(@types/node@22.5.3)(ts-node@10.9.2(@types/node@22.5.3)(typescript@5.5.4)))(typescript@5.5.4) + version: 29.2.5(@babel/core@7.24.5)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@22.5.3)(ts-node@10.9.2(@types/node@22.5.3)(typescript@5.5.4)))(typescript@5.5.4) ts-node: specifier: 10.9.2 version: 10.9.2(@types/node@22.5.3)(typescript@5.5.4) @@ -2251,9 +2251,18 @@ importers: '@opentelemetry/core': specifier: 1.26.0 version: 1.26.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-logs-otlp-grpc': + specifier: 0.53.0 + version: 0.53.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-logs-otlp-http': + specifier: 0.53.0 + version: 0.53.0(@opentelemetry/api@1.9.0) '@opentelemetry/exporter-metrics-otlp-grpc': specifier: 0.53.0 version: 0.53.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-http': + specifier: 0.53.0 + version: 0.53.0(@opentelemetry/api@1.9.0) '@opentelemetry/exporter-trace-otlp-grpc': specifier: 0.53.0 version: 0.53.0(@opentelemetry/api@1.9.0) @@ -2269,6 +2278,9 @@ importers: '@opentelemetry/propagator-aws-xray': specifier: 1.26.0 version: 1.26.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resource-detector-aws': + specifier: 1.6.1 + version: 1.6.1(@opentelemetry/api@1.9.0) '@opentelemetry/resources': specifier: 1.26.0 version: 1.26.0(@opentelemetry/api@1.9.0) @@ -2290,6 +2302,9 @@ importers: '@opentelemetry/winston-transport': specifier: 0.6.0 version: 0.6.0 + '@pocket-tools/ts-logger': + specifier: workspace:* + version: link:../ts-logger '@prisma/instrumentation': specifier: 5.19.1 version: 5.19.1 diff --git a/servers/client-api/config/router.yaml b/servers/client-api/config/router.yaml index af931ab75..05c12e310 100644 --- a/servers/client-api/config/router.yaml +++ b/servers/client-api/config/router.yaml @@ -123,36 +123,36 @@ telemetry: 'environment.name': '${env.APP_ENVIRONMENT}' 'service.name': 'client-api' 'environment.namespace': pocket - # metrics: - # common: - # resource: - # 'environment.name': '${env.APP_ENVIRONMENT}' - # 'service.name': 'client-api' - # 'environment.namespace': pocket - # views: - # # https://www.apollographql.com/docs/graphos/routing/security/demand-control#configuring-instrument-output - # # Define a custom view because cost is different than the default latency-oriented view of OpenTelemetry - # - name: cost.* - # aggregation: - # histogram: - # buckets: - # - 0 - # - 10 - # - 100 - # - 1000 - # - 10000 - # - 100000 - # - 1000000 - # otlp: - # enabled: false - # endpoint: 'http://${env.OTLP_COLLECTOR_HOST:-localhost}:4317' - # protocol: grpc - # batch_processor: - # scheduled_delay: 10000ms # 10 secs export due to google cloud rate limits - # max_concurrent_exports: 1000 - # max_export_batch_size: 10000 - # max_export_timeout: 100s - # max_queue_size: 10000 + metrics: + common: + resource: + 'environment.name': '${env.APP_ENVIRONMENT}' + 'service.name': 'client-api' + 'environment.namespace': pocket + views: + # https://www.apollographql.com/docs/graphos/routing/security/demand-control#configuring-instrument-output + # Define a custom view because cost is different than the default latency-oriented view of OpenTelemetry + - name: cost.* + aggregation: + histogram: + buckets: + - 0 + - 10 + - 100 + - 1000 + - 10000 + - 100000 + - 1000000 + otlp: + enabled: false + endpoint: '${env.OTLP_COLLECTOR_URL:-http://localhost:4318}' + protocol: http + batch_processor: + scheduled_delay: 10000ms # 10 secs export due to google cloud rate limits + max_concurrent_exports: 1000 + max_export_batch_size: 10000 + max_export_timeout: 100s + max_queue_size: 10000 tracing: common: resource: @@ -169,13 +169,13 @@ telemetry: header_name: 'pocket-trace-id' otlp: enabled: true - endpoint: 'http://${env.OTLP_COLLECTOR_HOST:-localhost}:4317' - protocol: grpc + endpoint: '${env.OTLP_COLLECTOR_URL:-http://localhost:4318}' + protocol: http batch_processor: scheduled_delay: 100ms max_concurrent_exports: 1000 max_export_batch_size: 10000 - max_export_timeout: 100s + max_export_timeout: 2s max_queue_size: 10000 apq: router: diff --git a/servers/client-api/config/supergraph.yaml b/servers/client-api/config/supergraph.yaml index 6340b4894..beafb91f4 100644 --- a/servers/client-api/config/supergraph.yaml +++ b/servers/client-api/config/supergraph.yaml @@ -1,76 +1,76 @@ federation_version: =2.7.0 subgraphs: # Pocket Monorepo - annotations-api: - routing_url: https://annotations-api.readitlater.com - # schema: - # subgraph_url: https://annotations-api.readitlater.com - schema: # Schema downloaded from GraphOS registry, does not poll for updates - graphref: pocket-client-api@current - subgraph: annotations-api + # annotations-api: + # routing_url: https://annotations-api.readitlater.com + # # schema: + # # subgraph_url: https://annotations-api.readitlater.com + # schema: # Schema downloaded from GraphOS registry, does not poll for updates + # graphref: pocket-client-api@current + # subgraph: annotations-api featureflags: - routing_url: https://featureflags.readitlater.com/graphql - schema: - subgraph_url: https://featureflags.readitlater.com/graphql - image-api: - routing_url: https://image-api.readitlater.com - schema: - subgraph_url: https://image-api.readitlater.com - list-api: - routing_url: https://list-api.readitlater.com - schema: - subgraph_url: https://list-api.readitlater.com - parser: - #routing_url: https://parser-graphql-wrapper.readitlater.com - #schema: - # subgraph_url: https://parser-graphql-wrapper.readitlater.com - routing_url: http://localhost:4001 - schema: - subgraph_url: http://localhost:4001 - shareable-lists-api: - routing_url: https://shareablelistsapi.readitlater.com + routing_url: https://featureflags.getpocket.dev/graphql schema: - subgraph_url: https://shareablelistsapi.readitlater.com - shares-api: - routing_url: https://shares-api.readitlater.com - schema: - subgraph_url: https://shares-api.readitlater.com - user: - routing_url: https://user-api.readitlater.com - schema: - subgraph_url: https://user-api.readitlater.com - user-list-search: - routing_url: https://user-list-search.readitlater.com/graphql - schema: - subgraph_url: https://user-list-search.readitlater.com/graphql + subgraph_url: https://featureflags.getpocket.dev/graphql + # image-api: + # routing_url: https://image-api.readitlater.com + # schema: + # subgraph_url: https://image-api.readitlater.com + # list-api: + # routing_url: https://list-api.readitlater.com + # schema: + # subgraph_url: https://list-api.readitlater.com + # parser: + # routing_url: https://parser-graphql-wrapper.getpocket.dev + # schema: + # subgraph_url: https://parser-graphql-wrapper.getpocket.dev + # routing_url: http://localhost:4001 + # schema: + # subgraph_url: http://localhost:4001 + # shareable-lists-api: + # routing_url: https://shareablelistsapi.readitlater.com + # schema: + # subgraph_url: https://shareablelistsapi.readitlater.com + # shares-api: + # routing_url: https://shares-api.readitlater.com + # schema: + # subgraph_url: https://shares-api.readitlater.com + # user: + # routing_url: https://user-api.readitlater.com + # schema: + # subgraph_url: https://user-api.readitlater.com + # user-list-search: + # routing_url: https://user-list-search.readitlater.com/graphql + # schema: + # subgraph_url: https://user-list-search.readitlater.com/graphql - # # Content Monorepo - collection: - routing_url: https://collection-api.readitlater.com - # schema: - # subgraph_url: https://collection-api.readitlater.com - schema: # Schema downloaded from GraphOS registry, does not poll for updates - graphref: pocket-client-api@current - subgraph: collection - curated-corpus: - routing_url: https://curated-corpus-api.readitlater.com - # schema: - # subgraph_url: https://curated-corpus-api.readitlater.com - schema: # Schema downloaded from GraphOS registry, does not poll for updates - graphref: pocket-client-api@current - subgraph: curated-corpus - # curation-tools: - # routing_url: https://curation-tools-api.readitlater.com + # # # Content Monorepo + # collection: + # routing_url: https://collection-api.readitlater.com + # # schema: + # # subgraph_url: https://collection-api.readitlater.com + # schema: # Schema downloaded from GraphOS registry, does not poll for updates + # graphref: pocket-client-api@current + # subgraph: collection + # curated-corpus: + # routing_url: https://curated-corpus-api.readitlater.com + # # schema: + # # subgraph_url: https://curated-corpus-api.readitlater.com + # schema: # Schema downloaded from GraphOS registry, does not poll for updates + # graphref: pocket-client-api@current + # subgraph: curated-corpus + # # curation-tools: + # # routing_url: https://curation-tools-api.readitlater.com + # # schema: + # # subgraph_url: https://curation-tools-api.readitlater.com + # recommendation-api: + # routing_url: https://recommendation-api.readitlater.com/ # schema: - # subgraph_url: https://curation-tools-api.readitlater.com - recommendation-api: - routing_url: https://recommendation-api.readitlater.com/ - schema: - subgraph_url: https://recommendation-api.readitlater.com/ - syndication: - routing_url: https://syndication-api-wrapper.readitlater.com/ - # schema: - # subgraph_url: https://syndication-api-wrapper.readitlater.com/ - schema: # Schema downloaded from GraphOS registry, does not poll for updates - graphref: pocket-client-api@current - subgraph: syndication \ No newline at end of file + # subgraph_url: https://recommendation-api.readitlater.com/ + # syndication: + # routing_url: https://syndication-api-wrapper.readitlater.com/ + # # schema: + # # subgraph_url: https://syndication-api-wrapper.readitlater.com/ + # schema: # Schema downloaded from GraphOS registry, does not poll for updates + # graphref: pocket-client-api@current + # subgraph: syndication \ No newline at end of file diff --git a/servers/list-api/src/config/index.ts b/servers/list-api/src/config/index.ts index 7e1d0b135..2a37e084f 100644 --- a/servers/list-api/src/config/index.ts +++ b/servers/list-api/src/config/index.ts @@ -123,7 +123,7 @@ export default { }, }, tracing: { - host: process.env.OTLP_COLLECTOR_HOST || 'localhost', + url: process.env.OTLP_COLLECTOR_URL || 'http://localhost:4318', release: process.env.GIT_SHA || '', serviceName, graphQLDepth: 8, diff --git a/servers/otel-collector/httpd.conf b/servers/otel-collector/httpd.conf index 545d6d2dc..470a5befa 100644 --- a/servers/otel-collector/httpd.conf +++ b/servers/otel-collector/httpd.conf @@ -3,7 +3,6 @@ server { listen 3000 default_server; listen [::]:3000 default_server; - # Everything is a 200 location /status { default_type application/json; return 200 '{"status": "ok"}'; diff --git a/servers/parser-graphql-wrapper/src/config/index.ts b/servers/parser-graphql-wrapper/src/config/index.ts index 8faf0d953..10f124fa7 100644 --- a/servers/parser-graphql-wrapper/src/config/index.ts +++ b/servers/parser-graphql-wrapper/src/config/index.ts @@ -14,7 +14,7 @@ export default { graphQLDepth: 8, // very permissive limit on depth tracing release: process.env.GIT_SHA || 'local', serviceName: 'parser-graphql-wrapper', - host: process.env.OTLP_COLLECTOR_HOST || 'localhost', + url: process.env.OTLP_COLLECTOR_URL || 'http://localhost:4318', }, app: { environment: process.env.NODE_ENV || 'development', diff --git a/servers/user-api/src/config/index.ts b/servers/user-api/src/config/index.ts index bb539fea6..35c694aee 100644 --- a/servers/user-api/src/config/index.ts +++ b/servers/user-api/src/config/index.ts @@ -81,7 +81,7 @@ export default { }, serviceName, tracing: { - host: process.env.OTLP_COLLECTOR_HOST || 'localhost', + url: process.env.OTLP_COLLECTOR_URL || 'http://localhost:4318', serviceName: serviceName, release: process.env.GIT_SHA || 'local', }, diff --git a/servers/user-list-search/src/config/index.ts b/servers/user-list-search/src/config/index.ts index 1408424d5..5213e49ae 100644 --- a/servers/user-list-search/src/config/index.ts +++ b/servers/user-list-search/src/config/index.ts @@ -7,7 +7,7 @@ const localAwsEndpoint = export const config = { serviceName: 'user-list-search', tracing: { - host: process.env.OTLP_COLLECTOR_HOST || 'localhost', + url: process.env.OTLP_COLLECTOR_URL || 'http://localhost:4318', release: process.env.GIT_SHA || '', serviceName: 'user-list-search', }, diff --git a/servers/v3-proxy-api/src/config/index.ts b/servers/v3-proxy-api/src/config/index.ts index f95b4b9c7..0db6fc665 100644 --- a/servers/v3-proxy-api/src/config/index.ts +++ b/servers/v3-proxy-api/src/config/index.ts @@ -13,7 +13,7 @@ export default { 9346, 7035, 15449, 22931, 23283, 53720, 60289, 70018, 73360, ], tracing: { - host: process.env.OTLP_COLLECTOR_HOST || 'localhost', + url: process.env.OTLP_COLLECTOR_URL || 'http://localhost:4318', serviceName: 'v3-api-proxy', release: process.env.GIT_SHA || 'local', },