Skip to content

Commit

Permalink
updating logging on trace poller
Browse files Browse the repository at this point in the history
  • Loading branch information
danielbdias committed Sep 13, 2023
1 parent 2849031 commit 52b1f07
Show file tree
Hide file tree
Showing 12 changed files with 306 additions and 93 deletions.
58 changes: 29 additions & 29 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
version: "3.2"
services:
tracetest:
restart: unless-stopped
image: kubeshop/tracetest:${TAG:-latest}
extra_hosts:
- "host.docker.internal:host-gateway"
build:
context: .
volumes:
- type: bind
source: ./local-config/tracetest.config.yaml
target: /app/tracetest.yaml
- type: bind
source: ./local-config/tracetest.provision.yaml
target: /app/provisioning.yaml
ports:
- 11633:11633
command: --provisioning-file /app/provisioning.yaml
healthcheck:
test: ["CMD", "wget", "--spider", "localhost:11633"]
interval: 1s
timeout: 3s
retries: 60
depends_on:
postgres:
condition: service_healthy
environment:
TRACETEST_DEV: ${TRACETEST_DEV}
# tracetest:
# restart: unless-stopped
# image: kubeshop/tracetest:${TAG:-latest}
# extra_hosts:
# - "host.docker.internal:host-gateway"
# build:
# context: .
# volumes:
# - type: bind
# source: ./local-config/tracetest.config.yaml
# target: /app/tracetest.yaml
# - type: bind
# source: ./local-config/tracetest.provision.yaml
# target: /app/provisioning.yaml
# ports:
# - 11633:11633
# command: --provisioning-file /app/provisioning.yaml
# healthcheck:
# test: ["CMD", "wget", "--spider", "localhost:11633"]
# interval: 1s
# timeout: 3s
# retries: 60
# depends_on:
# postgres:
# condition: service_healthy
# environment:
# TRACETEST_DEV: ${TRACETEST_DEV}

postgres:
image: postgres:14
Expand Down Expand Up @@ -54,5 +54,5 @@ services:
- "/otel-local-config.yaml"
volumes:
- ./local-config/collector.config.yaml:/otel-local-config.yaml
depends_on:
- tracetest
# depends_on:
# - tracetest
27 changes: 27 additions & 0 deletions local-config/dogfood/collector.config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
receivers:
otlp:
protocols:
grpc:
http:

processors:
batch:
timeout: 100ms

# Data sources: traces
probabilistic_sampler:
hash_seed: 22
sampling_percentage: 100

exporters:
jaeger:
endpoint: jaeger:14250
tls:
insecure: true

service:
pipelines:
traces:
receivers: [otlp]
processors: [probabilistic_sampler, batch]
exporters: [jaeger]
177 changes: 177 additions & 0 deletions local-config/dogfood/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
version: '3'
services:
postgres:
image: postgres:14
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
healthcheck:
test: pg_isready -U "$$POSTGRES_USER" -d "$$POSTGRES_DB"
interval: 1s
timeout: 5s
retries: 60
ports:
- 5432:5432

jaeger:
image: jaegertracing/all-in-one:latest
restart: unless-stopped
healthcheck:
test: ["CMD", "wget", "--spider", "localhost:16686"]
interval: 1s
timeout: 3s
retries: 60
ports:
- 16685:16685
- 16686:16686

otel-collector:
image: otel/opentelemetry-collector:0.54.0
command:
- "--config"
- "/otel-local-config.yaml"
volumes:
- ./collector.config.yaml:/otel-local-config.yaml
depends_on:
- jaeger
ports:
- 4317:4317

cache:
image: redis:6
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 1s
timeout: 3s
retries: 60

queue:
image: rabbitmq:3.8-management
restart: unless-stopped
healthcheck:
test: rabbitmq-diagnostics -q check_running
interval: 1s
timeout: 5s
retries: 60

stream:
image: confluentinc/cp-kafka:latest-ubi8
ports:
- 29092:29092
environment:
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://stream:9092,PLAINTEXT_HOST://127.0.0.1:29092
- KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093,PLAINTEXT_HOST://:29092
- [email protected]:9093
- KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_PROCESS_ROLES=controller,broker
- KAFKA_NODE_ID=1
- KAFKA_METADATA_LOG_SEGMENT_MS=15000
- KAFKA_METADATA_MAX_RETENTION_MS=60000
- KAFKA_METADATA_LOG_MAX_RECORD_BYTES_BETWEEN_SNAPSHOTS=2800
- KAFKA_AUTO_CREATE_TOPICS_ENABLE=true
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR=1
- KAFKA_HEAP_OPTS=-Xmx200m -Xms200m
- CLUSTER_ID=ckjPoprWQzOf0-FuNkGfFQ
healthcheck:
test: nc -z stream 9092
start_period: 10s
interval: 5s
timeout: 10s
retries: 10

demo-api:
image: kubeshop/demo-pokemon-api:latest
restart: unless-stopped
pull_policy: always
environment:
REDIS_URL: cache
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres?schema=public
RABBITMQ_HOST: queue
POKE_API_BASE_URL: https://pokeapi.co/api/v2
COLLECTOR_ENDPOINT: http://otel-collector:4317
NPM_RUN_COMMAND: api
ports:
- "8081:8081"
healthcheck:
test: ["CMD", "wget", "--spider", "localhost:8081"]
interval: 1s
timeout: 3s
retries: 60
depends_on:
postgres:
condition: service_healthy
cache:
condition: service_healthy
queue:
condition: service_healthy

demo-worker:
image: kubeshop/demo-pokemon-api:latest
restart: unless-stopped
pull_policy: always
environment:
REDIS_URL: cache
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres?schema=public
RABBITMQ_HOST: queue
POKE_API_BASE_URL: https://pokeapi.co/api/v2
COLLECTOR_ENDPOINT: http://otel-collector:4317
NPM_RUN_COMMAND: worker
depends_on:
postgres:
condition: service_healthy
cache:
condition: service_healthy
queue:
condition: service_healthy

demo-rpc:
image: kubeshop/demo-pokemon-api:latest
restart: unless-stopped
pull_policy: always
environment:
REDIS_URL: cache
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres?schema=public
RABBITMQ_HOST: queue
POKE_API_BASE_URL: https://pokeapi.co/api/v2
COLLECTOR_ENDPOINT: http://otel-collector:4317
NPM_RUN_COMMAND: rpc
ports:
- 8082:8082
healthcheck:
test: ["CMD", "lsof", "-i", "8082"]
interval: 1s
timeout: 3s
retries: 60
depends_on:
postgres:
condition: service_healthy
cache:
condition: service_healthy
queue:
condition: service_healthy

demo-streaming-worker:
image: kubeshop/demo-pokemon-api:latest
environment:
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/postgres?schema=public
POKE_API_BASE_URL: https://pokeapi.co/api/v2
COLLECTOR_ENDPOINT: http://otel-collector:4317
ZIPKIN_URL: http://localhost:9411
NPM_RUN_COMMAND: stream-worker
KAFKA_BROKER: 'stream:9092'
KAFKA_TOPIC: 'pokemon'
KAFKA_CLIENT_ID: 'streaming-worker'
REDIS_URL: cache
depends_on:
postgres:
condition: service_healthy
stream:
condition: service_healthy
cache:
condition: service_healthy
otel-collector:
condition: service_started
21 changes: 21 additions & 0 deletions local-config/dogfood/tracetest.config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
postgres:
host: localhost
user: postgres
password: postgres
port: 5432
dbname: postgres
params: sslmode=disable

telemetry:
exporters:
collector:
serviceName: tracetest
sampling: 100 # 100%
exporter:
type: collector
collector:
endpoint: localhost:4317

server:
telemetry:
exporter: collector
9 changes: 9 additions & 0 deletions local-config/dogfood/tracetest.provision.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
type: DataStore
spec:
name: jaeger
type: jaeger
jaeger:
endpoint: localhost:16685
tls:
insecure: true
25 changes: 13 additions & 12 deletions server/executor/tracepollerworker/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/kubeshop/tracetest/server/analytics"
"github.com/kubeshop/tracetest/server/datastore"
"github.com/kubeshop/tracetest/server/executor"
"github.com/kubeshop/tracetest/server/model"
"github.com/kubeshop/tracetest/server/model/events"
"github.com/kubeshop/tracetest/server/pkg/pipeline"
"github.com/kubeshop/tracetest/server/resourcemanager"
Expand All @@ -30,6 +31,13 @@ type workerState struct {
inputQueue pipeline.Enqueuer[executor.Job]
}

func emitEvent(ctx context.Context, state *workerState, event model.TestRunEvent) {
err := state.eventEmitter.Emit(ctx, event)
if err != nil {
log.Printf("[TracePoller] failed to emit %s event: error: %s", event.Type, err.Error())
}
}

func getTraceDB(ctx context.Context, state *workerState) (tracedb.TraceDB, error) {
ds, err := state.dsRepo.Current(ctx)
if err != nil {
Expand All @@ -51,15 +59,8 @@ func handleError(ctx context.Context, job executor.Job, err error, state *worker
jobFailed, reason := handleTraceDBError(ctx, job, err, state)

if jobFailed {
anotherErr := state.eventEmitter.Emit(ctx, events.TracePollingError(job.Test.ID, job.Run.ID, reason, err))
if anotherErr != nil {
log.Printf("[TracePoller] Test %s Run %d: fail to emit TracePollingError event: %s \n", job.Test.ID, job.Run.ID, err.Error())
}

anotherErr = state.eventEmitter.Emit(ctx, events.TraceFetchingError(job.Test.ID, job.Run.ID, err))
if anotherErr != nil {
log.Printf("[TracePoller] Test %s Run %d: fail to emit TracePollingError event: %s \n", job.Test.ID, job.Run.ID, err.Error())
}
emitEvent(ctx, state, events.TracePollingError(job.Test.ID, job.Run.ID, reason, err))
emitEvent(ctx, state, events.TraceFetchingError(job.Test.ID, job.Run.ID, err))
}
}

Expand Down Expand Up @@ -134,6 +135,6 @@ func handleDBError(err error) {
}
}

func isFirstRequest(job *executor.Job) bool {
return !job.Headers.GetBool("requeued")
}
// func isFirstRequest(job *executor.Job) bool {
// return !job.Headers.GetBool("requeued")
// }
Loading

0 comments on commit 52b1f07

Please sign in to comment.