Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add monitoring #118

Merged
merged 6 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions ansible/ansible-playbook
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -ue

## ansible-playbook is a wrapper script used to send a notification to slack
# whenever a new ansible deploy is triggered

ANSIBLE_SLACK_CMD=`printf "%q " "$0" "$@"`
ANSIBLE_SLACK_CMD="${ANSIBLE_SLACK_CMD% }" # strip trailing whitespace
export ANSIBLE_SLACK_CMD

ansible localhost --module-name include_role --args name=notify-slack
ansible-playbook "$@"
7 changes: 7 additions & 0 deletions ansible/deploy-bootstrap.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: Ensure all hosts are bootstrapped correctly
hosts: all
become: yes
roles:
- bootstrap
tags:
- bootstrap
10 changes: 10 additions & 0 deletions ansible/deploy-monitoring-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
- name: Update monitoring config
hosts: monitoring.ooni.org
become: true
tags:
- monitoring
roles:
- prometheus
- prometheus_blackbox_exporter
- prometheus_alertmanager
10 changes: 5 additions & 5 deletions ansible/deploy-monitoring.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---
- name: Update monitoring config
- name: Deploy monitoring host
hosts: monitoring.ooni.org
become: true
tags:
- monitoring
roles:
- prometheus
- prometheus_blackbox_exporter
- prometheus_alertmanager

- monitoring
vars:
monitoring_htpasswd: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/monitoring_htpasswd', profile='oonidevops_user_prod') }}"

- ansible.builtin.import_playbook: deploy-monitoring-config.yml
13 changes: 9 additions & 4 deletions ansible/inventory
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[all:children]
htz-fsn
ghs-ams
htz_fsn
ghs_ams

## Role tags

Expand All @@ -12,13 +12,18 @@ data3.htz-fsn.prod.ooni.nu

## Location tags

[htz-fsn]
[htz_fsn]
data.ooni.org
monitoring.ooni.org
notebook.ooni.org
data1.htz-fsn.prod.ooni.nu
data2.htz-fsn.prod.ooni.nu
data3.htz-fsn.prod.ooni.nu

[ghs-ams]
[ghs_ams]
openvpn-server1.ooni.io
amsmatomo.ooni.nu
db-1.proteus.ooni.io
ams-slack-1.ooni.org
#mia-echoth.ooni.nu
#mia-httpth.ooni.nu
9 changes: 2 additions & 7 deletions ansible/playbook.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
---
- name: Ensure all hosts are bootstrapped correctly
hosts: all
become: yes
roles:
- bootstrap
tags:
- bootstrap
- name: Include bootstrap playbook
ansible.builtin.import_playbook: deploy-bootstrap.yml

- name: Include tier0 playbook
ansible.builtin.import_playbook: deploy-tier0.yml
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/monitoring/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
enable_log_ingestion: false
56 changes: 56 additions & 0 deletions ansible/roles/monitoring/files/create_logs_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
CREATE TABLE IF NOT EXISTS default.logs
(
`CODE_FILE` String,
`CODE_FUNC` String,
`CODE_LINE` String,
`INVOCATION_ID` String,
`LOGGER` LowCardinality(String),
`MESSAGE_ID` String,
`MESSAGE` String,
`PRIORITY` UInt8,
`PROCESS_NAME` String,
`SYSLOG_FACILITY` LowCardinality(String),
`SYSLOG_IDENTIFIER` LowCardinality(String),
`SYSLOG_PID` Nullable(UInt64),
`SYSLOG_TIMESTAMP` String,
`THREAD_NAME` String,
`TID` UInt64,
`UNIT` String,
`_AUDIT_LOGINUID` Nullable(UInt64),
`_AUDIT_SESSION` Nullable(UInt64),
`_BOOT_ID` String,
`_CAP_EFFECTIVE` String,
`_CMDLINE` String,
`_COMM` LowCardinality(String),
`_EXE` LowCardinality(String),
`_GID` LowCardinality(UInt32),
`_HOSTNAME` String,
`_KERNEL_DEVICE` String,
`_KERNEL_SUBSYSTEM` String,
`_MACHINE_ID` String,
`_PID` UInt32,
`_SELINUX_CONTEXT` String,
`_SOURCE_MONOTONIC_TIMESTAMP` Nullable(Int64),
`_SOURCE_REALTIME_TIMESTAMP` Int64,
`_STREAM_ID` String,
`_SYSTEMD_CGROUP` LowCardinality(String),
`_SYSTEMD_INVOCATION_ID` String,
`_SYSTEMD_SLICE` String,
`_SYSTEMD_UNIT` LowCardinality(String),
`_TRANSPORT` LowCardinality(String),
`_UDEV_SYSNAME` String,
`_UID` LowCardinality(UInt32),
`__CURSOR` String,
`__MONOTONIC_TIMESTAMP` Nullable(Int64),
`__REALTIME_TIMESTAMP` Int64,
`date` DateTime64(6) ALIAS fromUnixTimestamp64Micro(_SOURCE_REALTIME_TIMESTAMP),
`host` LowCardinality(String),
`inserted_at` DateTime DEFAULT now(),
`message` String,
`rtdate` DateTime64(6) ALIAS fromUnixTimestamp64Micro(__REALTIME_TIMESTAMP),
`timestamp` String,
INDEX timestamp_minmax_idx timestamp TYPE minmax GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY __REALTIME_TIMESTAMP
SETTINGS index_granularity = 8192
17 changes: 17 additions & 0 deletions ansible/roles/monitoring/files/log-ingestion.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[Unit]
Description=log ingestion

[Service]
ExecStart=/bin/sh -c 'journalctl -ojson -f | clickhouse-client --query="INSERT INTO logs FORMAT JSONEachRow" --input_format_skip_unknown_fields=1 --input_format_allow_errors_ratio=1'

SystemCallFilter=~@clock @debug @cpu-emulation @keyring @module @mount @obsolete @raw-io @reboot @swap
NoNewPrivileges=yes
PrivateDevices=yes
PrivateTmp=yes
ProtectHome=yes
ProtectSystem=full
ProtectKernelModules=yes
ProtectKernelTunables=yes

[Install]
WantedBy=multi-user.target
85 changes: 85 additions & 0 deletions ansible/roles/monitoring/tasks/log-ingestion.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# # Vector

- name: vector - enable repo
tags: vector
shell: extrepo enable vector && extrepo update vector

- name: vector - install pkg
tags: vector
apt:
# refresh cache
cache_valid_time: 0
name:
- vector

- name: vector - deploy SQL file to create logs table
tags: vector
copy:
src: create_logs_table.sql
dest: /etc/clickhouse-server/create_logs_table.sql

- name: vector - create vector_logs table
tags: vector
command: clickhouse-client --multiline --multiquery --queries-file /etc/clickhouse-server/create_logs_table.sql

- name: vector - Generate syslog certificates
tags: vector
# runs locally
delegate_to: 127.0.0.1
shell: |
./vault view files/pusher_ca.key.vault | openssl req -x509 -new -nodes -key /dev/stdin -sha256 -days 3650 -subj '/O=OONI/OU=CA/CN=ooni.org' -out oonicacert.pem
openssl req -newkey rsa:2048 -nodes -days 3650 -keyout node.key -out node-req.pem -subj '/CN=ooni.org/O=OONI temp CA/C=US' -batch
./vault view files/pusher_ca.key.vault | openssl x509 -req -days 3650 -set_serial 01 -in node-req.pem -out node-cert.pem -CA oonicacert.pem -CAkey /dev/stdin
register: certs_ready

- name: vector - Copy TLS certs
tags: vector
ansible.builtin.copy:
src: "{{ item }}"
dest: /etc/vector/
mode: '0440'
owner: vector
loop:
- oonicacert.pem
- node-cert.pem
- node.key
when: certs_ready.changed

- name: vector - Delete files
tags: vector
# runs locally
delegate_to: 127.0.0.1
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- node-cert.pem
- node-req.pem
- node.key
- oonicacert.pem

- name: vector - configure
tags: vector
template:
src: templates/vector.toml
dest: /etc/vector/vector.toml

- name: vector - open port
tags: vector
ansible.builtin.copy:
src: templates/10514.nft
dest: /etc/ooni/nftables/tcp/
register: nft_reload_needed

- name: vector - reload nft
tags: vector
shell: systemctl reload nftables.service
when: nft_reload_needed.changed

- name: vector - restart service
tags: vector
systemd:
daemon_reload: yes
enabled: yes
name: vector.service
state: restarted
Loading
Loading