Skip to content

Commit

Permalink
Setup New Relic log forwarding (#345)
Browse files Browse the repository at this point in the history
* Remove BuildBuddy grpc client

* setup arm exporter

* updated tests

* optionally start arm_exporter

* setup cron for docker prune

* setup cron for required reboot

* setup log forwarding

* setup containerName and env labels for docker containers for logs
  • Loading branch information
mvgijssel authored Jun 8, 2023
1 parent 2be2586 commit ceb2029
Show file tree
Hide file tree
Showing 28 changed files with 192 additions and 813 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ jobs:
- uses: ./.github/actions/setup-bazel
with:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
- name: Run client
run: bazel run --config buildbuddy --config buildbuddy_rbe //tools/buildbuddy:client -- "Deploy Provisioner"
- run: bazel run --config buildbuddy --config buildbuddy_rbe //provisioner:deploy
env:
ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
5 changes: 3 additions & 2 deletions .github/workflows/schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ jobs:
- uses: ./.github/actions/setup-bazel
with:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
- name: Run client
run: bazel run --config buildbuddy --config buildbuddy_rbe //tools/buildbuddy:client -- "Validate Provisioner"
- run: bazel run --config buildbuddy --config buildbuddy_rbe //provisioner:deploy_validate
env:
ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
2 changes: 0 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,3 @@ use_repo(npm, "npm")
# ------------------------------------ buildifier ------------------------------------ #
# NOTE: don't upgrade to 6.1.0 because this has a bug where tests always pass
bazel_dep(name = "buildifier_prebuilt", version = "6.0.0.1")
bazel_dep(name = "rules_proto", version = "5.3.0-21.7")
bazel_dep(name = "protobuf", version = "21.7", repo_name = "com_google_protobuf")
55 changes: 0 additions & 55 deletions WORKSPACE.bzlmod
Original file line number Diff line number Diff line change
@@ -1,60 +1,5 @@
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")

# ------------------------------------ rules_go ------------------------------------ #
http_archive(
name = "io_bazel_rules_go",
sha256 = "6dc2da7ab4cf5d7bfc7c949776b1b7c733f05e56edc4bcd9022bb249d2e2a996",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.39.1/rules_go-v0.39.1.zip",
"https://github.com/bazelbuild/rules_go/releases/download/v0.39.1/rules_go-v0.39.1.zip",
],
)

# ------------------------------------ rules_proto_grpc ------------------------------------ #
http_archive(
name = "rules_proto_grpc",
sha256 = "928e4205f701b7798ce32f3d2171c1918b363e9a600390a25c876f075f1efc0a",
strip_prefix = "rules_proto_grpc-4.4.0",
urls = ["https://github.com/rules-proto-grpc/rules_proto_grpc/releases/download/4.4.0/rules_proto_grpc-4.4.0.tar.gz"],
)

load("@rules_proto_grpc//:repositories.bzl", "rules_proto_grpc_repos", "rules_proto_grpc_toolchains")

rules_proto_grpc_toolchains()

rules_proto_grpc_repos()

load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains")

rules_proto_dependencies()

rules_proto_toolchains()

load("@rules_proto_grpc//python:repositories.bzl", rules_proto_grpc_python_repos = "python_repos")

rules_proto_grpc_python_repos()

load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")

grpc_deps()

load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps")

grpc_extra_deps()

load("@rules_python//python:pip.bzl", "pip_parse")
load("@python3//:defs.bzl", "interpreter")

pip_parse(
name = "rules_proto_grpc_py3_deps",
python_interpreter_target = interpreter,
requirements_lock = "@rules_proto_grpc//python:requirements.txt",
)

load("@rules_proto_grpc_py3_deps//:requirements.bzl", "install_deps")

install_deps()

# ------------------------------------ rules_docker ------------------------------------ #
# https://github.com/bazelbuild/rules_docker/pull/2201
http_archive(
Expand Down
20 changes: 0 additions & 20 deletions buildbuddy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,3 @@ actions:
- "*"
bazel_commands:
- "test //... @rules_task//... --config buildbuddy --config buildbuddy_rbe"

- name: "Deploy Provisioner"
user: buildbuddy
container_image: "ubuntu-20.04"
triggers:
push:
branches:
- "reserved-for-buildbuddy-api__deploy-provisioner"
bazel_commands:
- "run //provisioner:deploy --config buildbuddy --config buildbuddy_rbe"

- name: "Validate Provisioner"
user: buildbuddy
container_image: "ubuntu-20.04"
triggers:
push:
branches:
- "reserved-for-buildbuddy-api__validate-provisioner"
bazel_commands:
- "run //provisioner:deploy_validate --config buildbuddy --config buildbuddy_rbe"
5 changes: 5 additions & 0 deletions provisioner/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ pyinfra_run(
"--data install_teleport=True",
],
data = [
"deploys/docker/files/daemon.json",
"deploys/microk8s/files/cmdline.txt",
"deploys/monitoring/files/docker-compose.yml.j2",
"deploys/monitoring/files/docker-logs-fluentbit.conf",
"deploys/monitoring/files/docker-parser-fluentbit.conf",
"deploys/monitoring/files/logging.yml",
"deploys/monitoring/files/newrelic-infra.yml.j2",
"deploys/monitoring/files/nri-prometheus-config.yaml",
"deploys/monitoring/files/otel-collector-config.yaml",
"deploys/monitoring/files/reboot.sh",
"deploys/network/files/99_config.yaml",
"deploys/teleport/files/teleport.yaml.j2",
],
Expand Down
8 changes: 8 additions & 0 deletions provisioner/deploys/docker/files/daemon.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "3",
"labels": "containerName,env"
}
}
29 changes: 28 additions & 1 deletion provisioner/deploys/docker/tasks/install_docker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyinfra.operations import apt, server, systemd
from pyinfra.operations import apt, server, systemd, files
from pyinfra import host
from pyinfra.facts.server import LsbRelease
from pyinfra.api.deploy import deploy
Expand Down Expand Up @@ -71,10 +71,37 @@ def install_docker():
_sudo=True,
)

files.put(
name="Copy Docker daemon config",
src="provisioner/deploys/docker/files/daemon.json",
dest="/etc/docker/daemon.json",
_sudo=True,
user="root",
group="root",
mode="0644",
)

systemd.service(
name="Enable the docker service",
service="docker.service",
running=True,
enabled=True,
restarted=True,
_sudo=True,
)

apt.packages(
name="Install cron",
packages=["cron"],
update=True,
cache_time=24 * 60 * 60,
_sudo=True,
)

server.crontab(
name="Prune Docker every day at 00:00",
command="docker system prune -a -f --volumes",
minute="0",
hour="0",
_sudo=True,
)
25 changes: 24 additions & 1 deletion provisioner/deploys/monitoring/files/docker-compose.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ services:
- GITHUB_EXPORTER_COLLECTOR_RUNNERS=false
- GITHUB_EXPORTER_COLLECTOR_BILLING=false
- GITHUB_EXPORTER_COLLECTOR_ADMIN=false
- SETUP_ENV={{ setup_env }}
deploy:
resources:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "github_exporter"
env: "{{ setup_env }}"

nri-prometheus:
container_name: nri-prometheus
Expand All @@ -36,6 +38,9 @@ services:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "nri-prometheus"
env: "{{ setup_env }}"

otel-collector:
container_name: otel-collector
Expand All @@ -55,3 +60,21 @@ services:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "otel-collector"
env: "{{ setup_env }}"

{% if is_arm %}
arm_exporter:
container_name: arm_exporter
image: carlosedp/arm_exporter:latest@sha256:c2510142e3824686cba8af75826737a8158b25648e29867e262d26f553de5211
restart: always
deploy:
resources:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "arm_exporter"
env: "{{ setup_env }}"
{% endif %}
27 changes: 27 additions & 0 deletions provisioner/deploys/monitoring/files/docker-logs-fluentbit.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[INPUT]
Name tail
Path /var/lib/docker/containers/*/*.log
Buffer_Max_Size 128k
Mem_Buf_Limit 16384k
Skip_Long_Lines On
Path_Key filePath
Tag docker-logs
DB /var/db/newrelic-infra/newrelic-integrations/logging/fb.db
Parser docker

[FILTER]
Name record_modifier
Match docker-logs
Record fb.input tail

[FILTER]
Name nest
Match docker-logs
Operation lift
Nested_under attrs

# [OUTPUT]
# Name file
# Format out_file
# Path /tmp/docker-logs
# Match docker-logs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[PARSER]
Name docker
Format json
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L %z
11 changes: 11 additions & 0 deletions provisioner/deploys/monitoring/files/logging.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
logs:
- name: docker-logs
fluentbit:
config_file: /etc/newrelic-infra/logging.d/docker-logs-fluentbit.conf
parsers_file: /etc/newrelic-infra/logging.d/docker-parser-fluentbit.conf

- name: system-logs
file: /var/log/*/*.log

- name: systemd-teleport
systemd: teleport
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ cluster_name: "provisioner"
# standalone: true

# How often the integration should run. Defaults to 30s.
scrape_duration: "1m"
scrape_duration: "30s"

# The HTTP client timeout when fetching data from targets. Defaults to 5s.
# scrape_timeout: "5s"
scrape_timeout: "15s"

# How old must the entries used for calculating the counters delta be
# before the telemetry emitter expires them. Defaults to 5m.
Expand Down Expand Up @@ -61,6 +61,9 @@ require_scrape_enabled_label_for_nodes: true
targets:
- description: github_exporter
urls: ["http://github_exporter:9504/metrics"]

- description: arm_exporter
urls: ["http://arm_exporter:9243/metrics"]
# targets:
# - description: Secure etcd example
# urls:
Expand Down
6 changes: 6 additions & 0 deletions provisioner/deploys/monitoring/files/reboot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/sh

# Copied from https://askubuntu.com/questions/829526/ubuntu-server-reboot-command-in-crontab-trigger-only-if-required
if [ -f /var/run/reboot-required ]; then
/sbin/shutdown -r now
fi
59 changes: 58 additions & 1 deletion provisioner/deploys/monitoring/tasks/install_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def install_monitoring():
github_exporter_token=github_exporter_token,
new_relic_license_key=new_relic_license_key,
setup_env=host.data.setup_env,
is_arm=host.get_fact(DebArch) == "arm64",
)

nri_prometheus_config = files.put(
Expand Down Expand Up @@ -61,7 +62,7 @@ def install_monitoring():
server.shell(
name="Start the monitoring service",
commands=[
"docker compose -f /opt/monitoring/docker-compose.yml up -d --force-recreate",
"docker compose -f /opt/monitoring/docker-compose.yml up -d --force-recreate --remove-orphans",
],
_sudo=True,
)
Expand Down Expand Up @@ -113,6 +114,36 @@ def install_monitoring():
_sudo=True,
)

files.put(
name="Copy New Relic logging config",
src="provisioner/deploys/monitoring/files/logging.yml",
dest="/etc/newrelic-infra/logging.d/logging.yml",
_sudo=True,
user="root",
group="root",
mode="0644",
)

files.put(
name="Copy New Relic docker fluentbit logging config",
src="provisioner/deploys/monitoring/files/docker-logs-fluentbit.conf",
dest="/etc/newrelic-infra/logging.d/docker-logs-fluentbit.conf",
_sudo=True,
user="root",
group="root",
mode="0644",
)

files.put(
name="Copy New Relic docker fluentbit parser",
src="provisioner/deploys/monitoring/files/docker-parser-fluentbit.conf",
dest="/etc/newrelic-infra/logging.d/docker-parser-fluentbit.conf",
_sudo=True,
user="root",
group="root",
mode="0644",
)

systemd.service(
name="Enable the New Relic service",
service="newrelic-infra.service",
Expand All @@ -121,3 +152,29 @@ def install_monitoring():
enabled=True,
_sudo=True,
)

apt.packages(
name="Install cron",
packages=["cron"],
update=True,
cache_time=24 * 60 * 60,
_sudo=True,
)

files.put(
name="Copy reboot script",
src="provisioner/deploys/monitoring/files/reboot.sh",
dest="/opt/monitoring/reboot.sh",
_sudo=True,
user="root",
group="root",
mode="0744",
)

server.crontab(
name="Reboot at 01:00 when required",
command="/opt/monitoring/reboot.sh",
minute="0",
hour="1",
_sudo=True,
)
Loading

0 comments on commit ceb2029

Please sign in to comment.