Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup New Relic log forwarding #345

Merged
merged 12 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ jobs:
- uses: ./.github/actions/setup-bazel
with:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
- name: Run client
run: bazel run --config buildbuddy --config buildbuddy_rbe //tools/buildbuddy:client -- "Deploy Provisioner"
- run: bazel run --config buildbuddy --config buildbuddy_rbe //provisioner:deploy
env:
ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
5 changes: 3 additions & 2 deletions .github/workflows/schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ jobs:
- uses: ./.github/actions/setup-bazel
with:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
- name: Run client
run: bazel run --config buildbuddy --config buildbuddy_rbe //tools/buildbuddy:client -- "Validate Provisioner"
- run: bazel run --config buildbuddy --config buildbuddy_rbe //provisioner:deploy_validate
env:
ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD: ${{ secrets.ONEPASSWORD_SERVICE_ACCOUNT_TOKEN_PROD }}
2 changes: 0 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,3 @@ use_repo(npm, "npm")
# ------------------------------------ buildifier ------------------------------------ #
# NOTE: don't upgrade to 6.1.0 because this has a bug where tests always pass
bazel_dep(name = "buildifier_prebuilt", version = "6.0.0.1")
bazel_dep(name = "rules_proto", version = "5.3.0-21.7")
bazel_dep(name = "protobuf", version = "21.7", repo_name = "com_google_protobuf")
55 changes: 0 additions & 55 deletions WORKSPACE.bzlmod
Original file line number Diff line number Diff line change
@@ -1,60 +1,5 @@
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")

# ------------------------------------ rules_go ------------------------------------ #
http_archive(
name = "io_bazel_rules_go",
sha256 = "6dc2da7ab4cf5d7bfc7c949776b1b7c733f05e56edc4bcd9022bb249d2e2a996",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.39.1/rules_go-v0.39.1.zip",
"https://github.com/bazelbuild/rules_go/releases/download/v0.39.1/rules_go-v0.39.1.zip",
],
)

# ------------------------------------ rules_proto_grpc ------------------------------------ #
http_archive(
name = "rules_proto_grpc",
sha256 = "928e4205f701b7798ce32f3d2171c1918b363e9a600390a25c876f075f1efc0a",
strip_prefix = "rules_proto_grpc-4.4.0",
urls = ["https://github.com/rules-proto-grpc/rules_proto_grpc/releases/download/4.4.0/rules_proto_grpc-4.4.0.tar.gz"],
)

load("@rules_proto_grpc//:repositories.bzl", "rules_proto_grpc_repos", "rules_proto_grpc_toolchains")

rules_proto_grpc_toolchains()

rules_proto_grpc_repos()

load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains")

rules_proto_dependencies()

rules_proto_toolchains()

load("@rules_proto_grpc//python:repositories.bzl", rules_proto_grpc_python_repos = "python_repos")

rules_proto_grpc_python_repos()

load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")

grpc_deps()

load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps")

grpc_extra_deps()

load("@rules_python//python:pip.bzl", "pip_parse")
load("@python3//:defs.bzl", "interpreter")

pip_parse(
name = "rules_proto_grpc_py3_deps",
python_interpreter_target = interpreter,
requirements_lock = "@rules_proto_grpc//python:requirements.txt",
)

load("@rules_proto_grpc_py3_deps//:requirements.bzl", "install_deps")

install_deps()

# ------------------------------------ rules_docker ------------------------------------ #
# https://github.com/bazelbuild/rules_docker/pull/2201
http_archive(
Expand Down
20 changes: 0 additions & 20 deletions buildbuddy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,3 @@ actions:
- "*"
bazel_commands:
- "test //... @rules_task//... --config buildbuddy --config buildbuddy_rbe"

- name: "Deploy Provisioner"
user: buildbuddy
container_image: "ubuntu-20.04"
triggers:
push:
branches:
- "reserved-for-buildbuddy-api__deploy-provisioner"
bazel_commands:
- "run //provisioner:deploy --config buildbuddy --config buildbuddy_rbe"

- name: "Validate Provisioner"
user: buildbuddy
container_image: "ubuntu-20.04"
triggers:
push:
branches:
- "reserved-for-buildbuddy-api__validate-provisioner"
bazel_commands:
- "run //provisioner:deploy_validate --config buildbuddy --config buildbuddy_rbe"
5 changes: 5 additions & 0 deletions provisioner/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ pyinfra_run(
"--data install_teleport=True",
],
data = [
"deploys/docker/files/daemon.json",
"deploys/microk8s/files/cmdline.txt",
"deploys/monitoring/files/docker-compose.yml.j2",
"deploys/monitoring/files/docker-logs-fluentbit.conf",
"deploys/monitoring/files/docker-parser-fluentbit.conf",
"deploys/monitoring/files/logging.yml",
"deploys/monitoring/files/newrelic-infra.yml.j2",
"deploys/monitoring/files/nri-prometheus-config.yaml",
"deploys/monitoring/files/otel-collector-config.yaml",
"deploys/monitoring/files/reboot.sh",
"deploys/network/files/99_config.yaml",
"deploys/teleport/files/teleport.yaml.j2",
],
Expand Down
8 changes: 8 additions & 0 deletions provisioner/deploys/docker/files/daemon.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "3",
"labels": "containerName,env"
}
}
29 changes: 28 additions & 1 deletion provisioner/deploys/docker/tasks/install_docker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pyinfra.operations import apt, server, systemd
from pyinfra.operations import apt, server, systemd, files
from pyinfra import host
from pyinfra.facts.server import LsbRelease
from pyinfra.api.deploy import deploy
Expand Down Expand Up @@ -71,10 +71,37 @@ def install_docker():
_sudo=True,
)

files.put(
name="Copy Docker daemon config",
src="provisioner/deploys/docker/files/daemon.json",
dest="/etc/docker/daemon.json",
_sudo=True,
user="root",
group="root",
mode="0644",
)

systemd.service(
name="Enable the docker service",
service="docker.service",
running=True,
enabled=True,
restarted=True,
_sudo=True,
)

apt.packages(
name="Install cron",
packages=["cron"],
update=True,
cache_time=24 * 60 * 60,
_sudo=True,
)

server.crontab(
name="Prune Docker every day at 00:00",
command="docker system prune -a -f --volumes",
minute="0",
hour="0",
_sudo=True,
)
25 changes: 24 additions & 1 deletion provisioner/deploys/monitoring/files/docker-compose.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ services:
- GITHUB_EXPORTER_COLLECTOR_RUNNERS=false
- GITHUB_EXPORTER_COLLECTOR_BILLING=false
- GITHUB_EXPORTER_COLLECTOR_ADMIN=false
- SETUP_ENV={{ setup_env }}
deploy:
resources:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "github_exporter"
env: "{{ setup_env }}"

nri-prometheus:
container_name: nri-prometheus
Expand All @@ -36,6 +38,9 @@ services:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "nri-prometheus"
env: "{{ setup_env }}"

otel-collector:
container_name: otel-collector
Expand All @@ -55,3 +60,21 @@ services:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "otel-collector"
env: "{{ setup_env }}"

{% if is_arm %}
arm_exporter:
container_name: arm_exporter
image: carlosedp/arm_exporter:latest@sha256:c2510142e3824686cba8af75826737a8158b25648e29867e262d26f553de5211
restart: always
deploy:
resources:
limits:
cpus: '0.5' # Maximum of 0.5 CPU cores
memory: '100M' # Maximum of 512 megabytes of memory
labels:
containerName: "arm_exporter"
env: "{{ setup_env }}"
{% endif %}
27 changes: 27 additions & 0 deletions provisioner/deploys/monitoring/files/docker-logs-fluentbit.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[INPUT]
Name tail
Path /var/lib/docker/containers/*/*.log
Buffer_Max_Size 128k
Mem_Buf_Limit 16384k
Skip_Long_Lines On
Path_Key filePath
Tag docker-logs
DB /var/db/newrelic-infra/newrelic-integrations/logging/fb.db
Parser docker

[FILTER]
Name record_modifier
Match docker-logs
Record fb.input tail

[FILTER]
Name nest
Match docker-logs
Operation lift
Nested_under attrs

# [OUTPUT]
# Name file
# Format out_file
# Path /tmp/docker-logs
# Match docker-logs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[PARSER]
Name docker
Format json
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L %z
11 changes: 11 additions & 0 deletions provisioner/deploys/monitoring/files/logging.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
logs:
- name: docker-logs
fluentbit:
config_file: /etc/newrelic-infra/logging.d/docker-logs-fluentbit.conf
parsers_file: /etc/newrelic-infra/logging.d/docker-parser-fluentbit.conf

- name: system-logs
file: /var/log/*/*.log

- name: systemd-teleport
systemd: teleport
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ cluster_name: "provisioner"
# standalone: true

# How often the integration should run. Defaults to 30s.
scrape_duration: "1m"
scrape_duration: "30s"

# The HTTP client timeout when fetching data from targets. Defaults to 5s.
# scrape_timeout: "5s"
scrape_timeout: "15s"

# How old must the entries used for calculating the counters delta be
# before the telemetry emitter expires them. Defaults to 5m.
Expand Down Expand Up @@ -61,6 +61,9 @@ require_scrape_enabled_label_for_nodes: true
targets:
- description: github_exporter
urls: ["http://github_exporter:9504/metrics"]

- description: arm_exporter
urls: ["http://arm_exporter:9243/metrics"]
# targets:
# - description: Secure etcd example
# urls:
Expand Down
6 changes: 6 additions & 0 deletions provisioner/deploys/monitoring/files/reboot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/sh

# Copied from https://askubuntu.com/questions/829526/ubuntu-server-reboot-command-in-crontab-trigger-only-if-required
if [ -f /var/run/reboot-required ]; then
/sbin/shutdown -r now
fi
59 changes: 58 additions & 1 deletion provisioner/deploys/monitoring/tasks/install_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def install_monitoring():
github_exporter_token=github_exporter_token,
new_relic_license_key=new_relic_license_key,
setup_env=host.data.setup_env,
is_arm=host.get_fact(DebArch) == "arm64",
)

nri_prometheus_config = files.put(
Expand Down Expand Up @@ -61,7 +62,7 @@ def install_monitoring():
server.shell(
name="Start the monitoring service",
commands=[
"docker compose -f /opt/monitoring/docker-compose.yml up -d --force-recreate",
"docker compose -f /opt/monitoring/docker-compose.yml up -d --force-recreate --remove-orphans",
],
_sudo=True,
)
Expand Down Expand Up @@ -113,6 +114,36 @@ def install_monitoring():
_sudo=True,
)

files.put(
name="Copy New Relic logging config",
src="provisioner/deploys/monitoring/files/logging.yml",
dest="/etc/newrelic-infra/logging.d/logging.yml",
_sudo=True,
user="root",
group="root",
mode="0644",
)

files.put(
name="Copy New Relic docker fluentbit logging config",
src="provisioner/deploys/monitoring/files/docker-logs-fluentbit.conf",
dest="/etc/newrelic-infra/logging.d/docker-logs-fluentbit.conf",
_sudo=True,
user="root",
group="root",
mode="0644",
)

files.put(
name="Copy New Relic docker fluentbit parser",
src="provisioner/deploys/monitoring/files/docker-parser-fluentbit.conf",
dest="/etc/newrelic-infra/logging.d/docker-parser-fluentbit.conf",
_sudo=True,
user="root",
group="root",
mode="0644",
)

systemd.service(
name="Enable the New Relic service",
service="newrelic-infra.service",
Expand All @@ -121,3 +152,29 @@ def install_monitoring():
enabled=True,
_sudo=True,
)

apt.packages(
name="Install cron",
packages=["cron"],
update=True,
cache_time=24 * 60 * 60,
_sudo=True,
)

files.put(
name="Copy reboot script",
src="provisioner/deploys/monitoring/files/reboot.sh",
dest="/opt/monitoring/reboot.sh",
_sudo=True,
user="root",
group="root",
mode="0744",
)

server.crontab(
name="Reboot at 01:00 when required",
command="/opt/monitoring/reboot.sh",
minute="0",
hour="1",
_sudo=True,
)
Loading