diff --git a/ansible/deploy-clickhouse.yml b/ansible/deploy-clickhouse.yml new file mode 100644 index 00000000..c2d34cc7 --- /dev/null +++ b/ansible/deploy-clickhouse.yml @@ -0,0 +1,13 @@ +--- +- name: Deploy oonidata clickhouse hosts + hosts: + - notebook.ooni.org + - data1.htz-fsn.prod.ooni.nu + #- data2.htz-fsn.prod.ooni.nu + - data3.htz-fsn.prod.ooni.nu + become: true + tags: + - clickhouse + roles: + - prometheus_node_exporter + - oonidata_clickhouse diff --git a/ansible/deploy-monitoring.yml b/ansible/deploy-monitoring.yml new file mode 100644 index 00000000..a1eadee9 --- /dev/null +++ b/ansible/deploy-monitoring.yml @@ -0,0 +1,12 @@ +--- +- name: Update monitoring config + hosts: monitoring.ooni.org + become: true + tags: + - monitoring + roles: + - prometheus + - prometheus_blackbox_exporter + - prometheus_alertmanager + + diff --git a/ansible/deploy-tier0.yml b/ansible/deploy-tier0.yml new file mode 100644 index 00000000..ffe68c02 --- /dev/null +++ b/ansible/deploy-tier0.yml @@ -0,0 +1,19 @@ +--- +- name: Include monitoring playbook + ansible.builtin.import_playbook: deploy-monitoring.yml + +- name: Include clickhouse playbook + ansible.builtin.import_playbook: deploy-clickhouse.yml + +- name: Deploy oonidata worker nodes + hosts: + - data1.htz-fsn.prod.ooni.nu + become: true + tags: + - oonidata_worker + roles: + - oonidata + vars: + enable_jupyterhub: false + enable_oonipipeline_worker: true + clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" diff --git a/ansible/deploy-tier2.yml b/ansible/deploy-tier2.yml new file mode 100644 index 00000000..8f87a663 --- /dev/null +++ b/ansible/deploy-tier2.yml @@ -0,0 +1,25 @@ +--- +- name: Setup OpenVPN server + hosts: openvpn-server1.ooni.io + become: true + remote_user: root + roles: + - ssh_users + +- name: Deploy notebook host + hosts: notebook.ooni.org + become: true + tags: + - notebook + vars: + enable_oonipipeline_worker: false + roles: + - oonidata + +# commented out due to the fact it requires manual config of ~/.ssh/config +#- name: Setup codesign box +# hosts: codesign-box +# become: true +# remote_user: ubuntu +# roles: +# - codesign_box diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index 17712861..d18687cd 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -27,3 +27,5 @@ admin_usernames: [ art, mehul ] root_usernames: [ art, mehul ] non_admin_usernames: [ ] deactivated_usernames: [ sbs, federico, sarath ] + +prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" diff --git a/ansible/group_vars/clickhouse/vars.yml b/ansible/group_vars/clickhouse/vars.yml index 8e7388e8..f1ac5248 100644 --- a/ansible/group_vars/clickhouse/vars.yml +++ b/ansible/group_vars/clickhouse/vars.yml @@ -7,6 +7,8 @@ nftables_clickhouse_allow: ip: 168.119.7.188 - fqdn: notebook.ooni.org ip: 138.201.19.39 + - fqdn: clickhouseproxy.dev.ooni.io + ip: "{{ lookup('dig', 'clickhouseproxy.dev.ooni.io/A') }}" nftables_zookeeper_allow: - fqdn: data1.htz-fsn.prod.ooni.nu @@ -24,7 +26,7 @@ clickhouse_config: max_connections: 4096 keep_alive_timeout: 3 max_concurrent_queries: 100 - max_server_memory_usage: 0 + max_server_memory_usage: 21001001000 max_thread_pool_size: 10000 max_server_memory_usage_to_ram_ratio: 0.9 total_memory_profiler_step: 4194304 @@ -154,6 +156,10 @@ clickhouse_distributed_ddl: clickhouse_default_profiles: default: readonly: 2 + max_memory_usage: 11001001000 + use_uncompressed_cache: 0 + load_balancing: random + max_partitions_per_insert_block: 100 readonly: readonly: 1 write: @@ -194,3 +200,17 @@ clickhouse_default_quotas: result_rows: 0 read_rows: 0 execution_time: 0 + +clickhouse_prometheus: + endpoint: "/metrics" + port: 9363 + metrics: true + events: true + asynchronous_metrics: true + status_info: true + +prometheus_nginx_proxy_config: + - location: /metrics/node_exporter + proxy_pass: http://127.0.0.1:8100/metrics + - location: /metrics/clickhouse + proxy_pass: http://127.0.0.1:9363/metrics diff --git a/ansible/inventory b/ansible/inventory index 25f1f5df..a44f8d45 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -1,22 +1,24 @@ -[all] -# This requires manual setup of ~/.ssh/config -#codesign-box +[all:children] +htz-fsn +ghs-ams -[prod] -data.ooni.org -oonidata.ooni.org -monitoring.ooni.org -openvpn-server1.ooni.io +## Role tags + +[clickhouse] notebook.ooni.org data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu -[dev] -oonidatatest.ooni.nu +## Location tags -[clickhouse] +[htz-fsn] +data.ooni.org +monitoring.ooni.org notebook.ooni.org data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu + +[ghs-ams] +openvpn-server1.ooni.io diff --git a/ansible/playbook.yml b/ansible/playbook.yml index 63d2b448..17bcd402 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -7,63 +7,8 @@ tags: - bootstrap -- name: Update monitoring config - hosts: monitoring.ooni.org - become: true - tags: - - monitoring - roles: - - prometheus - - prometheus_blackbox_exporter - - prometheus_alertmanager - -- name: Setup OpenVPN server - hosts: openvpn-server1.ooni.io - become: true - remote_user: root - roles: - - ssh_users - -- name: Deploy oonidata clickhouse hosts - hosts: - - data1.htz-fsn.prod.ooni.nu - #- data2.htz-fsn.prod.ooni.nu - - data3.htz-fsn.prod.ooni.nu - - notebook.ooni.org - become: true - tags: - - clickhouse - roles: - #- tailnet - - oonidata_clickhouse - -- name: Deploy oonidata worker nodes - hosts: - - data1.htz-fsn.prod.ooni.nu - become: true - tags: - - oonidata_worker - roles: - - oonidata - vars: - enable_jupyterhub: false - enable_oonipipeline_worker: true - clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" - -- name: Deploy notebook host - hosts: notebook.ooni.org - become: true - tags: - - notebook - vars: - enable_oonipipeline_worker: false - roles: - - oonidata +- name: Include tier0 playbook + ansible.builtin.import_playbook: deploy-tier0.yml -# commented out due to the fact it requires manual config of ~/.ssh/config -#- name: Setup codesign box -# hosts: codesign-box -# become: true -# remote_user: ubuntu -# roles: -# - codesign_box +- name: Include tier2 playbook + ansible.builtin.import_playbook: deploy-tier2.yml diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 0a2eae7d..52ae85ea 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -1,7 +1,6 @@ - src: willshersystems.sshd - src: nginxinc.nginx - src: geerlingguy.certbot -- src: geerlingguy.node_exporter - src: artis3n.tailscale - src: https://github.com/idealista/clickhouse_role scm: git diff --git a/ansible/roles/bootstrap/tasks/main.yml b/ansible/roles/bootstrap/tasks/main.yml index ecf1d46f..500d58ff 100644 --- a/ansible/roles/bootstrap/tasks/main.yml +++ b/ansible/roles/bootstrap/tasks/main.yml @@ -55,11 +55,6 @@ tags: - nftables -- ansible.builtin.include_role: - name: prometheus_node_exporter - tags: - - node_exporter - - name: Configure journald tags: - journald diff --git a/ansible/roles/nginx/defaults/main.yml b/ansible/roles/nginx/defaults/main.yml new file mode 100644 index 00000000..4c0ac11a --- /dev/null +++ b/ansible/roles/nginx/defaults/main.yml @@ -0,0 +1 @@ +nginx_user: nginx diff --git a/ansible/roles/nginx/templates/nginx.conf b/ansible/roles/nginx/templates/nginx.conf index f43bf7c5..7b1b594c 100644 --- a/ansible/roles/nginx/templates/nginx.conf +++ b/ansible/roles/nginx/templates/nginx.conf @@ -1,122 +1,61 @@ -# NB: system nginx uses `www-data` user! -user nginx; -worker_processes 2; +# Managed by ansible +# roles/nginx/templates/nginx.conf +# -error_log /var/log/nginx/error.log warn; -pid /var/run/nginx.pid; +user {{ nginx_user }}; +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; events { - worker_connections 1024; + worker_connections 768; + # multi_accept on; } http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - - geo $is_ooni { - # TODO: this is not implemented ATM - default 0; - } - - map $http_x_request_id $has_request_id { # check for `X-Request-ID` - "" 0; - default 1; - } - - map "$is_ooni:$has_request_id" $ooni_request_id { - "1:1" $http_x_request_id; # use `X-Request-ID` if it's okay - default $request_id; - } - - # IPv4 is anonymized to /24, IPv6 to /48 - according to OONI Data Policy. - # https://ooni.torproject.org/about/data-policy/ - # IP is recorded to track possible abusers, not to distinguish users, so the - # address is truncated down to ISP (min routable prefix) instead of hashing. - map $remote_addr $ooni_remote_addr { - default "0.0.0.0"; - # variables in map value require nginx/1.11.0+ - "~(?P\d+\.\d+\.\d+)\.\d+" "$ip.0"; - # :: means at least TWO zero 16bit fields, https://tools.ietf.org/html/rfc5952#section-4.2.2 - "~(?P[0-9a-f]+:[0-9a-f]+:[0-9a-f]+):[0-9a-f:]+" "$ip::"; - "~(?P[0-9a-f]+:[0-9a-f]+)::[0-9a-f:]+" "$ip::"; - "~(?P[0-9a-f]+)::[0-9a-f:]+" "$ip::"; - } - - # $server_name is important as mtail does not distinguish log lines from - # different files, $host is required to log actual `Host` header. - # $request is split into separate fields to ease awk and mtail parsing. - # $scheme is used instead of $https to ease eye-reading. - # TCP_INFO is logged for random fun. - log_format mtail_pub - '$time_iso8601\t$msec\t$server_name\t' - '$ooni_remote_addr\t' # pub/int diff - '$request_completion\t$request_time\t$status\t$bytes_sent\t$body_bytes_sent\t' - '$upstream_cache_status\t$upstream_addr\t$upstream_status\t$upstream_connect_time\t$upstream_header_time\t$upstream_response_time\t' - '$scheme\t$server_protocol\t$request_length\t$request_method\t$host\t$request_uri\t' - '$tcpinfo_rtt\t$tcpinfo_rttvar\t' - '$http_referer\t$http_user_agent\t$ooni_request_id'; - - log_format mtail_int - '$time_iso8601\t$msec\t$server_name\t' - '$remote_addr\t' # pub/int diff - '$request_completion\t$request_time\t$status\t$bytes_sent\t$body_bytes_sent\t' - '$upstream_cache_status\t$upstream_addr\t$upstream_status\t$upstream_connect_time\t$upstream_header_time\t$upstream_response_time\t' - '$scheme\t$server_protocol\t$request_length\t$request_method\t$host\t$request_uri\t' - '$tcpinfo_rtt\t$tcpinfo_rttvar\t' - '$http_referer\t$http_user_agent\t$ooni_request_id'; - - log_format oolog '$ooni_remote_addr - $remote_user [$time_local] ' - '"$request" $status $body_bytes_sent ' - '"$http_referer" "$http_user_agent" "$host"'; - - log_format oolog_mtail '$time_iso8601\t$msec\t$server_name\t' - '$ooni_remote_addr\t' # pub/int diff - '$request_completion\t$request_time\t$status\t$bytes_sent\t$body_bytes_sent\t' - '$upstream_cache_status\t$upstream_addr\t$upstream_status\t$upstream_connect_time\t$upstream_header_time\t$upstream_response_time\t' - '$scheme\t$server_protocol\t$request_length\t$request_method\t$host\t$request_uri\t' - '$tcpinfo_rtt\t$tcpinfo_rttvar\t' - '$http_referer\t$http_user_agent\t$ooni_request_id'; - - access_log /var/log/nginx/access.log mtail_int; - - sendfile on; - tcp_nopush on; # TCP_CORK HTTP headers with sendfile() body into single packet - - keepalive_timeout 120 120; # Firefox has 115s, http://kb.mozillazine.org/Network.http.keep-alive.timeout - - server_tokens off; - - # SSL based on https://wiki.mozilla.org/Security/Server_Side_TLS (doc v4.1) - ssl_session_timeout 1d; - ssl_session_cache shared:GLOBAL:1m; # 1m of cache is ~4000 sessions - ssl_session_tickets off; # needs accurate key rotation - ssl_dhparam /etc/nginx/ffdhe2048_dhparam.pem; # https://tools.ietf.org/html/rfc7919 - ssl_prefer_server_ciphers on; - #TODO: ssl_stapling on; # needs `resolver` or `ssl_stapling_file` - #TODO: ssl_stapling_verify on; # needs `ssl_trusted_certificate` - #TODO: resolver ; - # Define in server{} - # - include /etc/nginx/ssl_modern.conf | /etc/nginx/ssl_intermediate.conf - # - ssl_certificate /etc/letsencrypt/live/example.org/fullchain.pem; - # - ssl_certificate_key /etc/letsencrypt/live/example.org/privkey.pem - # - ssl_trusted_certificate /etc/letsencrypt/live/example.org/chain.pem; # for ssl_stapling_verify - # - add_header Strict-Transport-Security max-age=15768000; # HSTS (15768000 seconds = 6 months) - ### - - gzip on; - gzip_types text/html text/plain text/css text/xml text/javascript application/x-javascript application/json application/xml; # default is only `text/html` - gzip_disable "msie6"; - #gzip_proxied any; - - # Host, X-Real-IP, X-Forwarded-For, X-Forwarded-Proto are from - # file /etc/nginx/proxy_params from nginx-common package - # NB: adding `proxy_set_header` in another location overwrites whole set! - proxy_set_header Host $http_host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Request-ID $ooni_request_id; - - include /etc/nginx/conf.d/*.conf; - include /etc/nginx/sites-enabled/*; + + # Basic Settings + + sendfile on; + tcp_nopush on; # TCP_CORK HTTP headers with sendfile() body into single packet + types_hash_max_size 2048; + # server_tokens off; + + # server_names_hash_bucket_size 64; + # server_name_in_redirect off; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging Settings + + # anonymize ipaddr + map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; + } + + # log anonymized ipaddr and caching status + log_format ooni_nginx_fmt '$remote_addr_anon $upstream_cache_status [$time_local] ' + '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"'; + + access_log syslog:server=unix:/dev/log ooni_nginx_fmt; + error_log syslog:server=unix:/dev/log; + + # Gzip Settings + + gzip on; + + # gzip_vary on; + # gzip_proxied any; + # gzip_comp_level 6; + # gzip_buffers 16 8k; + # gzip_http_version 1.1; + # gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + + # Virtual Host Configs + + include /etc/nginx/conf.d/*.conf; + include /etc/nginx/sites-enabled/*; } diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index e8f9cd30..f4111045 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -151,6 +151,30 @@ scrape_configs: - targets: - backend-fsn.ooni.org:9363 + - job_name: 'clickhouse cluster' + scrape_interval: 5s + scheme: http + metrics_path: "/metrics/clickhouse" + basic_auth: + username: 'prom' + password: '{{ prometheus_metrics_password_prod }}' + static_configs: + - targets: + - data1.htz-fsn.prod.ooni.nu:9100 + - data3.htz-fsn.prod.ooni.nu:9100 + + - job_name: 'node' + scrape_interval: 5s + scheme: http + metrics_path: "/metrics/node_exporter" + basic_auth: + username: 'prom' + password: '{{ prometheus_metrics_password_prod }}' + static_configs: + - targets: + - data1.htz-fsn.prod.ooni.nu:9100 + - data3.htz-fsn.prod.ooni.nu:9100 + # See ansible/roles/ooni-backend/tasks/main.yml for the scraping targets - job_name: 'haproxy' scrape_interval: 5s diff --git a/ansible/roles/prometheus_node_exporter/defaults/main.yml b/ansible/roles/prometheus_node_exporter/defaults/main.yml new file mode 100644 index 00000000..3433498f --- /dev/null +++ b/ansible/roles/prometheus_node_exporter/defaults/main.yml @@ -0,0 +1,16 @@ +prometheus_nginx_proxy_config: + - location: /metrics/node_exporter + proxy_pass: http://127.0.0.1:8100/metrics + +node_exporter_version: '1.8.2' +node_exporter_arch: 'amd64' +node_exporter_download_url: https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-{{ node_exporter_arch }}.tar.gz + +node_exporter_bin_path: /usr/local/bin/node_exporter +node_exporter_host: 'localhost' +node_exporter_port: 8100 +node_exporter_options: '' + +node_exporter_state: started +node_exporter_enabled: true +node_exporter_restart: on-failure diff --git a/ansible/roles/prometheus_node_exporter/handlers/main.yml b/ansible/roles/prometheus_node_exporter/handlers/main.yml index 69a5b2fe..4ec66003 100644 --- a/ansible/roles/prometheus_node_exporter/handlers/main.yml +++ b/ansible/roles/prometheus_node_exporter/handlers/main.yml @@ -13,3 +13,8 @@ ansible.builtin.systemd_service: name: nginx state: restarted + +- name: restart node_exporter + service: + name: node_exporter + state: restarted diff --git a/ansible/roles/prometheus_node_exporter/tasks/install.yml b/ansible/roles/prometheus_node_exporter/tasks/install.yml new file mode 100644 index 00000000..2ad7ccd7 --- /dev/null +++ b/ansible/roles/prometheus_node_exporter/tasks/install.yml @@ -0,0 +1,60 @@ +--- +- name: Check current node_exporter version. + command: "{{ node_exporter_bin_path }} --version" + failed_when: false + changed_when: false + register: node_exporter_version_check + +- name: Download and unarchive node_exporter into temporary location. + unarchive: + src: "{{ node_exporter_download_url }}" + dest: /tmp + remote_src: true + mode: 0755 + when: > + node_exporter_version_check.stdout is not defined + or node_exporter_version not in node_exporter_version_check.stdout + register: node_exporter_download_check + +- name: Move node_exporter binary into place. + copy: + src: "/tmp/node_exporter-{{ node_exporter_version }}.linux-{{ node_exporter_arch }}/node_exporter" + dest: "{{ node_exporter_bin_path }}" + mode: 0755 + remote_src: true + notify: restart node_exporter + when: > + node_exporter_download_check is changed + or node_exporter_version_check.stdout | length == 0 + +- name: Create node_exporter user. + user: + name: node_exporter + shell: /sbin/nologin + state: present + +- name: Copy the node_exporter systemd unit file. + template: + src: node_exporter.service.j2 + dest: /etc/systemd/system/node_exporter.service + mode: 0644 + register: node_exporter_service + +- name: Reload systemd daemon if unit file is changed. + systemd: + daemon_reload: true + notify: restart node_exporter + when: node_exporter_service is changed + +- name: Ensure node_exporter is running and enabled at boot. + service: + name: node_exporter + state: "{{ node_exporter_state }}" + enabled: "{{ node_exporter_enabled }}" + +- name: Verify node_exporter is responding to requests. + uri: + url: "http://{% if node_exporter_host !='' %}{{ node_exporter_host }}{% else %}localhost{% endif %}:{{ node_exporter_port }}/" + return_content: true + register: metrics_output + failed_when: "'Metrics' not in metrics_output.content" diff --git a/ansible/roles/prometheus_node_exporter/tasks/main.yml b/ansible/roles/prometheus_node_exporter/tasks/main.yml index 0c4fc242..cf9f8229 100644 --- a/ansible/roles/prometheus_node_exporter/tasks/main.yml +++ b/ansible/roles/prometheus_node_exporter/tasks/main.yml @@ -4,15 +4,7 @@ - nginx - node_exporter -- ansible.builtin.include_role: - name: geerlingguy.node_exporter - vars: - node_exporter_host: "localhost" - node_exporter_port: 8100 - tags: - - monitoring - - node_exporter - - config +- include_tasks: install.yml - name: create ooni configuration directory ansible.builtin.file: @@ -30,7 +22,7 @@ name: prom password: "{{ prometheus_metrics_password }}" owner: root - group: www-data + group: nginx mode: 0640 tags: - monitoring @@ -55,7 +47,7 @@ nft_rules_tcp: - name: 9100 rules: - - add rule inet filter input tcp dport 9100 counter accept comment "Incoming prometheus monitoring" + - add rule inet filter input ip saddr 5.9.112.244 tcp dport 9100 counter accept comment "clickhouse prometheus from monitoring.ooni.org" tags: - monitoring - node_exporter diff --git a/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 b/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 index 7d9fbab1..7e68c45c 100644 --- a/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 +++ b/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 @@ -7,14 +7,18 @@ server { access_log /var/log/nginx/{{ inventory_hostname }}.access.log; error_log /var/log/nginx/{{ inventory_hostname }}.log warn; - location /metrics { + {% for config in prometheus_nginx_proxy_config %} + + location {{ config['location'] }} { auth_basic "Administrator’s Area"; auth_basic_user_file /etc/ooni/prometheus_passwd; - proxy_pass http://127.0.0.1:8100; + proxy_pass {{ config['proxy_pass'] }}; proxy_set_header X-Real-IP $remote_addr; proxy_set_header Host $host; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } -} \ No newline at end of file + + {% endfor %} +} diff --git a/ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 b/ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 new file mode 100644 index 00000000..42cb98cc --- /dev/null +++ b/ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 @@ -0,0 +1,11 @@ +[Unit] +Description=NodeExporter + +[Service] +TimeoutStartSec=0 +User=node_exporter +ExecStart={{ node_exporter_bin_path }} --web.listen-address={{ node_exporter_host }}:{{ node_exporter_port }} {{ node_exporter_options }} +Restart={{ node_exporter_restart }} + +[Install] +WantedBy=multi-user.target diff --git a/scripts/cluster-migration/benchmark.sql b/scripts/cluster-migration/benchmark.sql new file mode 100644 index 00000000..55e06781 --- /dev/null +++ b/scripts/cluster-migration/benchmark.sql @@ -0,0 +1,55 @@ +SELECT + countIf ( + anomaly = 't' + AND confirmed = 'f' + AND msm_failure = 'f' + ) AS anomaly_count, + countIf ( + confirmed = 't' + AND msm_failure = 'f' + ) AS confirmed_count, + countIf (msm_failure = 't') AS failure_count, + countIf ( + anomaly = 'f' + AND confirmed = 'f' + AND msm_failure = 'f' + ) AS ok_count, + COUNT(*) AS measurement_count, + domain +FROM + fastpath +WHERE + measurement_start_time >= '2024-11-01' + AND measurement_start_time < '2024-11-10' + AND probe_cc = 'IT' +GROUP BY + domain; + +SELECT + COUNT(*) AS measurement_count, + domain +FROM + analysis_web_measurement +WHERE + measurement_start_time >= '2024-11-01' + AND measurement_start_time < '2024-11-10' + AND probe_cc = 'IT' +GROUP BY + domain; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster MODIFY +ORDER BY + ( + measurement_start_time, + probe_cc, + probe_asn, + domain, + measurement_uid + ) +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster ADD INDEX IF NOT EXISTS measurement_start_time_idx measurement_start_time TYPE minmax GRANULARITY 2; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster MATERIALIZE INDEX measurement_start_time_idx; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster ADD INDEX IF NOT EXISTS probe_cc_idx probe_cc TYPE minmax GRANULARITY 1; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster MATERIALIZE INDEX probe_cc_idx; \ No newline at end of file diff --git a/scripts/cluster-migration/db-sample.py b/scripts/cluster-migration/db-sample.py new file mode 100644 index 00000000..d4544135 --- /dev/null +++ b/scripts/cluster-migration/db-sample.py @@ -0,0 +1,33 @@ +from datetime import datetime, timedelta +import csv + +from tqdm import tqdm +from clickhouse_driver import Client as ClickhouseClient + + +START_TIME = datetime(2024, 11, 1, 0, 0, 0) +END_TIME = datetime(2024, 11, 10, 0, 0, 0) +SAMPLE_SIZE = 100 + + +def sample_to_file(table_name): + with ClickhouseClient.from_url("clickhouse://localhost/ooni") as click, open( + f"{table_name}-sample.csv", "w" + ) as out_file: + writer = csv.writer(out_file) + ts = START_TIME + while ts < END_TIME: + for row in click.execute_iter( + f""" + SELECT * FROM {table_name} + WHERE measurement_uid LIKE '{ts.strftime("%Y%m%d%H")}%' + ORDER BY measurement_uid LIMIT {SAMPLE_SIZE} + """ + ): + writer.writerow(row) + ts += timedelta(hours=1) + + +if __name__ == "__main__": + sample_to_file("obs_web") + sample_to_file("analysis_web_measurement") diff --git a/scripts/cluster-migration/migrate-tables.py b/scripts/cluster-migration/migrate-tables.py new file mode 100644 index 00000000..2a3d4bfb --- /dev/null +++ b/scripts/cluster-migration/migrate-tables.py @@ -0,0 +1,38 @@ +import os + +from tqdm import tqdm +from clickhouse_driver import Client as ClickhouseClient + + +WRITE_CLICKHOUSE_URL = os.environ["WRITE_CLICKHOUSE_URL"] + + +def stream_table(table_name, where_clause): + with ClickhouseClient.from_url("clickhouse://backend-fsn.ooni.org/") as click: + for row in click.execute_iter(f"SELECT * FROM {table_name} {where_clause}"): + yield row + + +def copy_table(table_name, where_clause): + with ClickhouseClient.from_url(WRITE_CLICKHOUSE_URL) as click_writer: + buf = [] + for row in tqdm(stream_table(table_name=table_name, where_clause=where_clause)): + buf.append(row) + if len(buf) > 50_000: + click_writer.execute(f"INSERT INTO {table_name} VALUES", buf) + buf = [] + + if len(buf) > 0: + click_writer.execute(f"INSERT INTO {table_name} VALUES", buf) + + +if __name__ == "__main__": + assert WRITE_CLICKHOUSE_URL, "WRITE_CLICKHOUSE_URL environment variable is not set" + print("## copying `fastpath` table") + copy_table("fastpath", "WHERE measurement_uid < '20241127'") + print("## copying `jsonl` table") + copy_table("jsonl", "WHERE measurement_uid < '20241127'") + print("## copying `citizenlab` table") + copy_table("citizenlab", "") + print("## copying `citizenlab_flip` table") + copy_table("citizenlab_flip", "") diff --git a/scripts/cluster-migration/schema.sql b/scripts/cluster-migration/schema.sql new file mode 100644 index 00000000..7588f060 --- /dev/null +++ b/scripts/cluster-migration/schema.sql @@ -0,0 +1,137 @@ +CREATE TABLE + ooni.jsonl ON CLUSTER oonidata_cluster ( + `report_id` String, + `input` String, + `s3path` String, + `linenum` Int32, + `measurement_uid` String, + `date` Date, + `source` String, + `update_time` DateTime64 (3) MATERIALIZED now64 () + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/jsonl/{shard}', + '{replica}', + update_time + ) +ORDER BY + (report_id, input, measurement_uid) SETTINGS index_granularity = 8192; + +CREATE TABLE + ooni.fastpath ON CLUSTER oonidata_cluster ( + `measurement_uid` String, + `report_id` String, + `input` String, + `probe_cc` LowCardinality (String), + `probe_asn` Int32, + `test_name` LowCardinality (String), + `test_start_time` DateTime, + `measurement_start_time` DateTime, + `filename` String, + `scores` String, + `platform` String, + `anomaly` String, + `confirmed` String, + `msm_failure` String, + `domain` String, + `software_name` String, + `software_version` String, + `control_failure` String, + `blocking_general` Float32, + `is_ssl_expected` Int8, + `page_len` Int32, + `page_len_ratio` Float32, + `server_cc` String, + `server_asn` Int8, + `server_as_name` String, + `update_time` DateTime64 (3) MATERIALIZED now64 (), + `test_version` String, + `architecture` String, + `engine_name` LowCardinality (String), + `engine_version` String, + `test_runtime` Float32, + `blocking_type` String, + `test_helper_address` LowCardinality (String), + `test_helper_type` LowCardinality (String), + `ooni_run_link_id` Nullable (UInt64), + INDEX fastpath_rid_idx report_id TYPE minmax GRANULARITY 1, + INDEX measurement_uid_idx measurement_uid TYPE minmax GRANULARITY 8 + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/fastpath/{shard}', + '{replica}', + update_time + ) +ORDER BY + ( + measurement_start_time, + report_id, + input, + measurement_uid + ) SETTINGS index_granularity = 8192; + +CREATE TABLE + ooni.citizenlab ON CLUSTER oonidata_cluster ( + `domain` String, + `url` String, + `cc` FixedString (32), + `category_code` String + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/citizenlab/{shard}', + '{replica}' + ) +ORDER BY + (domain, url, cc, category_code) SETTINGS index_granularity = 4; + +CREATE TABLE + ooni.citizenlab_flip ON CLUSTER oonidata_cluster ( + `domain` String, + `url` String, + `cc` FixedString (32), + `category_code` String + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/citizenlab_flip/{shard}', + '{replica}' + ) +ORDER BY + (domain, url, cc, category_code) SETTINGS index_granularity = 4; + +CREATE TABLE + analysis_web_measurement ON CLUSTER oonidata_cluster ( + `domain` String, + `input` String, + `test_name` String, + `probe_asn` UInt32, + `probe_as_org_name` String, + `probe_cc` String, + `resolver_asn` UInt32, + `resolver_as_cc` String, + `network_type` String, + `measurement_start_time` DateTime64 (3, 'UTC'), + `measurement_uid` String, + `ooni_run_link_id` String, + `top_probe_analysis` Nullable (String), + `top_dns_failure` Nullable (String), + `top_tcp_failure` Nullable (String), + `top_tls_failure` Nullable (String), + `dns_blocked` Float32, + `dns_down` Float32, + `dns_ok` Float32, + `tcp_blocked` Float32, + `tcp_down` Float32, + `tcp_ok` Float32, + `tls_blocked` Float32, + `tls_down` Float32, + `tls_ok` Float32 + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/analysis_web_measurement/{shard}', + '{replica}' + ) +PARTITION BY + substring(measurement_uid, 1, 6) PRIMARY KEY measurement_uid +ORDER BY + ( + measurement_uid, + measurement_start_time, + probe_cc, + probe_asn, + domain + ) SETTINGS index_granularity = 8192; \ No newline at end of file diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 09e4636c..2b14235b 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -34,10 +34,13 @@ provider "aws" { # source_profile = oonidevops_user } -# In order for this provider to work you have to set the following environment -# variable to your DigitalOcean API token: -# DIGITALOCEAN_ACCESS_TOKEN= -provider "digitalocean" {} +data "aws_ssm_parameter" "do_token" { + name = "/oonidevops/secrets/digitalocean_access_token" +} + +provider "digitalocean" { + token = data.aws_ssm_parameter.do_token.value +} data "aws_availability_zones" "available" {} @@ -226,6 +229,10 @@ resource "aws_secretsmanager_secret_version" "oonipg_url" { ) } +data "aws_ssm_parameter" "clickhouse_readonly_url" { + name = "/oonidevops/secrets/clickhouse_readonly_url" +} + resource "random_id" "artifact_id" { byte_length = 4 } @@ -277,31 +284,6 @@ module "ooni_th_droplet" { dns_zone_ooni_io = local.dns_zone_ooni_io } -module "ooni_backendproxy" { - source = "../../modules/ooni_backendproxy" - - stage = local.environment - - vpc_id = module.network.vpc_id - subnet_id = module.network.vpc_subnet_public[0].id - private_subnet_cidr = module.network.vpc_subnet_private[*].cidr_block - dns_zone_ooni_io = local.dns_zone_ooni_io - - key_name = module.adm_iam_roles.oonidevops_key_name - instance_type = "t2.micro" - - backend_url = "https://backend-hel.ooni.org/" - wcth_addresses = module.ooni_th_droplet.droplet_ipv4_address - wcth_domain_suffix = "th.dev.ooni.io" - clickhouse_url = "backend-fsn.ooni.org" - clickhouse_port = "9000" - - tags = merge( - local.tags, - { Name = "ooni-tier0-backendproxy" } - ) -} - ### OONI Services Clusters module "ooniapi_cluster" { @@ -316,7 +298,7 @@ module "ooniapi_cluster" { asg_max = 6 asg_desired = 2 - instance_type = "t3a.medium" + instance_type = "t3a.micro" tags = merge( local.tags, @@ -346,8 +328,7 @@ module "ooniapi_ooniprobe_deployer" { module "ooniapi_ooniprobe" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 # First run should be set on first run to bootstrap the task definition # first_run = true @@ -379,6 +360,86 @@ module "ooniapi_ooniprobe" { ) } +#### OONI Backend proxy service + +module "ooniapi_reverseproxy_deployer" { + source = "../../modules/ooniapi_service_deployer" + + service_name = "reverseproxy" + repo = "ooni/backend" + branch_name = "master" + buildspec_path = "ooniapi/services/reverseproxy/buildspec.yml" + codestar_connection_arn = aws_codestarconnections_connection.oonidevops.arn + + codepipeline_bucket = aws_s3_bucket.ooniapi_codepipeline_bucket.bucket + + ecs_service_name = module.ooniapi_reverseproxy.ecs_service_name + ecs_cluster_name = module.ooniapi_cluster.cluster_name +} + +module "ooniapi_reverseproxy" { + source = "../../modules/ooniapi_service" + + task_memory = 64 + + # First run should be set on first run to bootstrap the task definition + # first_run = true + + vpc_id = module.network.vpc_id + public_subnet_ids = module.network.vpc_subnet_public[*].id + private_subnet_ids = module.network.vpc_subnet_private[*].id + + service_name = "reverseproxy" + default_docker_image_url = "ooni/api-reverseproxy:latest" + stage = local.environment + dns_zone_ooni_io = local.dns_zone_ooni_io + key_name = module.adm_iam_roles.oonidevops_key_name + ecs_cluster_id = module.ooniapi_cluster.cluster_id + + task_secrets = { + PROMETHEUS_METRICS_PASSWORD = aws_secretsmanager_secret_version.prometheus_metrics_password.arn + } + + task_environment = { + TARGET_URL = "https://backend-hel.ooni.org/" + } + + ooniapi_service_security_groups = [ + module.ooniapi_cluster.web_security_group_id + ] + + tags = merge( + local.tags, + { Name = "ooni-tier0-reverseproxy" } + ) +} + +module "ooni_backendproxy" { + source = "../../modules/ooni_backendproxy" + + stage = local.environment + + vpc_id = module.network.vpc_id + subnet_id = module.network.vpc_subnet_public[0].id + private_subnet_cidr = module.network.vpc_subnet_private[*].cidr_block + dns_zone_ooni_io = local.dns_zone_ooni_io + + key_name = module.adm_iam_roles.oonidevops_key_name + instance_type = "t3a.nano" + + backend_url = "https://backend-fsn.ooni.org/" + wcth_addresses = module.ooni_th_droplet.droplet_ipv4_address + wcth_domain_suffix = "th.ooni.org" + clickhouse_url = "clickhouse1.prod.ooni.io" + clickhouse_port = "9000" + + tags = merge( + local.tags, + { Name = "ooni-tier0-backendproxy" } + ) +} + + #### OONI Run service @@ -400,8 +461,7 @@ module "ooniapi_oonirun_deployer" { module "ooniapi_oonirun" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id @@ -438,7 +498,7 @@ module "ooniapi_oonifindings_deployer" { service_name = "oonifindings" repo = "ooni/backend" - branch_name = "master" + branch_name = "oonidata" buildspec_path = "ooniapi/services/oonifindings/buildspec.yml" codestar_connection_arn = aws_codestarconnections_connection.oonidevops.arn @@ -451,8 +511,7 @@ module "ooniapi_oonifindings_deployer" { module "ooniapi_oonifindings" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id @@ -469,6 +528,7 @@ module "ooniapi_oonifindings" { POSTGRESQL_URL = aws_secretsmanager_secret_version.oonipg_url.arn JWT_ENCRYPTION_KEY = aws_secretsmanager_secret_version.jwt_secret.arn PROMETHEUS_METRICS_PASSWORD = aws_secretsmanager_secret_version.prometheus_metrics_password.arn + CLICKHOUSE_URL = data.aws_ssm_parameter.clickhouse_readonly_url.arn } ooniapi_service_security_groups = [ @@ -502,8 +562,7 @@ module "ooniapi_ooniauth_deployer" { module "ooniapi_ooniauth" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id @@ -559,7 +618,7 @@ module "ooniapi_frontend" { vpc_id = module.network.vpc_id subnet_ids = module.network.vpc_subnet_public[*].id - oonibackend_proxy_target_group_arn = module.ooni_backendproxy.alb_target_group_id + oonibackend_proxy_target_group_arn = module.ooniapi_reverseproxy.alb_target_group_id ooniapi_oonirun_target_group_arn = module.ooniapi_oonirun.alb_target_group_id ooniapi_ooniauth_target_group_arn = module.ooniapi_ooniauth.alb_target_group_id ooniapi_ooniprobe_target_group_arn = module.ooniapi_ooniprobe.alb_target_group_id @@ -595,7 +654,7 @@ locals { } resource "aws_route53_record" "ooniapi_frontend_main" { - name = local.ooniapi_frontend_main_domain_name + name = local.ooniapi_frontend_main_domain_name zone_id = local.ooniapi_frontend_main_domain_name_zone_id type = "A" diff --git a/tf/modules/ooni_backendproxy/main.tf b/tf/modules/ooni_backendproxy/main.tf index ad5b9bec..81c98ee0 100644 --- a/tf/modules/ooni_backendproxy/main.tf +++ b/tf/modules/ooni_backendproxy/main.tf @@ -10,13 +10,6 @@ resource "aws_security_group" "nginx_sg" { vpc_id = var.vpc_id - ingress { - protocol = "tcp" - from_port = 80 - to_port = 80 - cidr_blocks = ["0.0.0.0/0"] - } - ingress { protocol = "tcp" from_port = 9000 @@ -132,7 +125,7 @@ resource "aws_lb_target_group_attachment" "oonibackend_proxy" { resource "aws_route53_record" "clickhouse_proxy_alias" { zone_id = var.dns_zone_ooni_io - name = "clickhouse.${var.stage}.ooni.io" + name = "clickhouseproxy.${var.stage}.ooni.io" type = "CNAME" ttl = 300 diff --git a/tf/modules/ooniapi_frontend/main.tf b/tf/modules/ooniapi_frontend/main.tf index c72937a2..d65f3b9d 100644 --- a/tf/modules/ooniapi_frontend/main.tf +++ b/tf/modules/ooniapi_frontend/main.tf @@ -182,7 +182,12 @@ resource "aws_lb_listener_rule" "ooniapi_oonifindings_rule" { condition { path_pattern { - values = ["/api/v1/incidents/*"] + values = [ + "/api/v1/incidents/*", + "/api/v1/aggregation/*", + "/api/v1/observations", + "/api/v1/analysis", + ] } } } diff --git a/tf/modules/ooniapi_service/main.tf b/tf/modules/ooniapi_service/main.tf index ad429a01..c5def884 100644 --- a/tf/modules/ooniapi_service/main.tf +++ b/tf/modules/ooniapi_service/main.tf @@ -40,11 +40,6 @@ resource "aws_cloudwatch_log_group" "ooniapi_service" { name = "ooni-ecs-group/${local.name}" } - -locals { - container_port = 80 -} - // This is done to retrieve the image name of the current task definition // It's important to keep aligned the container_name and task_definitions data "aws_ecs_container_definition" "ooniapi_service_current" { @@ -59,18 +54,17 @@ resource "aws_ecs_task_definition" "ooniapi_service" { container_definitions = jsonencode([ { - cpu = var.task_cpu, + memoryReservation = var.task_memory, essential = true, image = try( data.aws_ecs_container_definition.ooniapi_service_current[0].image, var.default_docker_image_url ), - memory = var.task_memory, name = local.name, portMappings = [ { - containerPort = local.container_port, + containerPort = 80 } ], diff --git a/tf/modules/ooniapi_service/templates/profile_policy.json b/tf/modules/ooniapi_service/templates/profile_policy.json index 5857ee55..3a772893 100644 --- a/tf/modules/ooniapi_service/templates/profile_policy.json +++ b/tf/modules/ooniapi_service/templates/profile_policy.json @@ -35,6 +35,16 @@ "Action": "secretsmanager:ListSecrets", "Resource": "*" }, + { + "Effect": "Allow", + "Action": [ + "ssm:GetParameter", + "ssm:GetParameters", + "ssm:GetParameterHistory", + "ssm:GetParametersByPath" + ], + "Resource": "arn:aws:ssm:*" + }, { "Effect": "Allow", "Action": [ diff --git a/tf/modules/ooniapi_service/variables.tf b/tf/modules/ooniapi_service/variables.tf index f83e16d7..bda90a72 100644 --- a/tf/modules/ooniapi_service/variables.tf +++ b/tf/modules/ooniapi_service/variables.tf @@ -44,13 +44,8 @@ variable "service_desired_count" { default = 1 } -variable "task_cpu" { - default = 256 - description = "https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#task_size" -} - variable "task_memory" { - default = 512 + default = 64 description = "https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#task_size" } @@ -79,4 +74,4 @@ variable "task_environment" { variable "ooniapi_service_security_groups" { description = "the shared web security group from the ecs cluster" type = list(string) -} +} \ No newline at end of file