From f87f1cefe0914d5e6db32e302579091b5cd19b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 20 Dec 2024 17:59:12 +0100 Subject: [PATCH] Add support for deploying airflow (#132) It's using our own fork of this idealista role available here: https://github.com/ooni/airflow-role. We should probably at some point upstream the changes. --- ansible/deploy-airflow.yml | 9 ++ ansible/deploy-tier0.yml | 14 +-- ansible/group_vars/airflow/vars.yml | 13 +++ ansible/inventory | 3 + ansible/requirements.yml | 7 ++ ansible/roles/dehydrated/meta/main.yml | 2 +- ansible/roles/dehydrated/tasks/main.yml | 8 -- ansible/roles/nginx/tasks/main.yml | 10 --- ansible/roles/oonidata_airflow/Readme.md | 25 ++++++ .../roles/oonidata_airflow/defaults/main.yml | 2 + .../roles/oonidata_airflow/handlers/main.yml | 4 + ansible/roles/oonidata_airflow/tasks/main.yml | 89 +++++++++++++++++++ .../templates/nginx-airflow.j2 | 40 +++++++++ tf/environments/prod/dns_records.tf | 20 +++-- 14 files changed, 209 insertions(+), 37 deletions(-) create mode 100644 ansible/deploy-airflow.yml create mode 100644 ansible/group_vars/airflow/vars.yml create mode 100644 ansible/roles/oonidata_airflow/Readme.md create mode 100644 ansible/roles/oonidata_airflow/defaults/main.yml create mode 100644 ansible/roles/oonidata_airflow/handlers/main.yml create mode 100644 ansible/roles/oonidata_airflow/tasks/main.yml create mode 100644 ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 diff --git a/ansible/deploy-airflow.yml b/ansible/deploy-airflow.yml new file mode 100644 index 0000000..ebf34e4 --- /dev/null +++ b/ansible/deploy-airflow.yml @@ -0,0 +1,9 @@ +--- +- name: Deploy airflow frontend host + hosts: + - data1.htz-fsn.prod.ooni.nu + become: true + roles: + - oonidata_airflow + vars: + airflow_public_fqdn: "airflow.prod.ooni.io" diff --git a/ansible/deploy-tier0.yml b/ansible/deploy-tier0.yml index 7c11a8c..3657d54 100644 --- a/ansible/deploy-tier0.yml +++ b/ansible/deploy-tier0.yml @@ -8,15 +8,5 @@ - name: Include clickhouse playbook ansible.builtin.import_playbook: deploy-clickhouse.yml -- name: Deploy oonidata worker nodes - hosts: - - data1.htz-fsn.prod.ooni.nu - become: true - tags: - - oonidata_worker - roles: - - oonidata - vars: - enable_jupyterhub: false - enable_oonipipeline_worker: true - clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" +- name: Include airflow playbook + ansible.builtin.import_playbook: deploy-airflow.yml diff --git a/ansible/group_vars/airflow/vars.yml b/ansible/group_vars/airflow/vars.yml new file mode 100644 index 0000000..e494671 --- /dev/null +++ b/ansible/group_vars/airflow/vars.yml @@ -0,0 +1,13 @@ +airflow_admin_users: + - name: OONI Admin + username: admin + password: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_admin_password', profile='oonidevops_user_prod') }}" + role: Admin + firstname: Open + lastname: Observatory + email: admin@ooni.org +airflow_fernet_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_fernet_key', profile='oonidevops_user_prod') }}" +airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}" +airflow_executor: "LocalExecutor" +airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}" +airflow_database_conn: "postgresql+psycopg2://airflow:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_postgresql_password', profile='oonidevops_user_prod') }}@ooni-tier0-postgres.c7mgscca82no.eu-central-1.rds.amazonaws.com/airflow" diff --git a/ansible/inventory b/ansible/inventory index bda9a48..1d3c41e 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -10,6 +10,9 @@ data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu +[airflow] +data1.htz-fsn.prod.ooni.nu + ## Location tags [htz_fsn] diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 52ae85e..e78d86b 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -1,8 +1,15 @@ - src: willshersystems.sshd + version: v0.25.0 - src: nginxinc.nginx + version: 0.24.3 - src: geerlingguy.certbot + version: 5.2.0 - src: artis3n.tailscale + version: v4.5.0 - src: https://github.com/idealista/clickhouse_role scm: git version: 3.5.1 name: idealista.clickhouse_role +- src: https://github.com/ooni/airflow-role.git + scm: git + name: ooni.airflow_role diff --git a/ansible/roles/dehydrated/meta/main.yml b/ansible/roles/dehydrated/meta/main.yml index e7e996b..0e72e86 100644 --- a/ansible/roles/dehydrated/meta/main.yml +++ b/ansible/roles/dehydrated/meta/main.yml @@ -1,5 +1,5 @@ --- dependencies: - - nginx-buster + - nginx ... diff --git a/ansible/roles/dehydrated/tasks/main.yml b/ansible/roles/dehydrated/tasks/main.yml index 0bfaf7c..0a84f1a 100644 --- a/ansible/roles/dehydrated/tasks/main.yml +++ b/ansible/roles/dehydrated/tasks/main.yml @@ -47,14 +47,6 @@ tags: dehydrated shell: systemctl reload nginx.service -- name: allow incoming TCP connections to Nginx on port 80 - tags: dehydrated - blockinfile: - path: /etc/ooni/nftables/tcp/80.nft - create: yes - block: | - add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" - - name: reload nftables service tags: dehydrated shell: systemctl reload nftables.service diff --git a/ansible/roles/nginx/tasks/main.yml b/ansible/roles/nginx/tasks/main.yml index 757b42e..9af2a9b 100644 --- a/ansible/roles/nginx/tasks/main.yml +++ b/ansible/roles/nginx/tasks/main.yml @@ -51,13 +51,3 @@ notify: reload nginx tags: - nginx - -- name: create config dir - ansible.builtin.file: - path: /etc/ooni/nftables/tcp - state: directory - owner: root - group: root - mode: 0755 - tags: - - nftables diff --git a/ansible/roles/oonidata_airflow/Readme.md b/ansible/roles/oonidata_airflow/Readme.md new file mode 100644 index 0000000..c43a8f2 --- /dev/null +++ b/ansible/roles/oonidata_airflow/Readme.md @@ -0,0 +1,25 @@ +## Airflow role deployment notes + +There are a few pieces that are dependencies to this role running properly that +you will have to do manually: + +* Setup the postgresql database and create the relevant DB and account. + +Be sure to give correct permissions to the airflow user. Here is a relevant snippet: +``` +CREATE DATABASE airflow +CREATE ROLE airflow WITH PASSWORD '' LOGIN; +GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow; +GRANT ALL ON SCHEMA public TO airflow; +``` + +* For some reason the admin account creation is failing. This is likely a bug + in the upstream role. During the last deploy this was addressed by logging +into the host and running the create task manually: +``` +AIRFLOW_CONFIG=/etc/airflow/airflow.cfg AIRFLOW_HOME=/opt/airflow/ /opt/airflow/bin/airflow users create --username admin --password XXX --firstname Open --lastname Observatory --role Admin --email admin@ooni.org +``` + +* Once the setup is complete, you will then have to login to the host using the + admin user and go into Admin->Configuration and add the `clickhouse_url` +variable diff --git a/ansible/roles/oonidata_airflow/defaults/main.yml b/ansible/roles/oonidata_airflow/defaults/main.yml new file mode 100644 index 0000000..c422ed2 --- /dev/null +++ b/ansible/roles/oonidata_airflow/defaults/main.yml @@ -0,0 +1,2 @@ +tls_cert_dir: /var/lib/dehydrated/certs +certbot_domains_extra: [] diff --git a/ansible/roles/oonidata_airflow/handlers/main.yml b/ansible/roles/oonidata_airflow/handlers/main.yml new file mode 100644 index 0000000..f6dda47 --- /dev/null +++ b/ansible/roles/oonidata_airflow/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Reload nginx + ansible.builtin.systemd_service: + name: nginx + state: reloaded diff --git a/ansible/roles/oonidata_airflow/tasks/main.yml b/ansible/roles/oonidata_airflow/tasks/main.yml new file mode 100644 index 0000000..cec780a --- /dev/null +++ b/ansible/roles/oonidata_airflow/tasks/main.yml @@ -0,0 +1,89 @@ +- name: Ensure Airflow group + group: + name: "airflow" + become: true + +# TODO: uncomment this section if you want to redeploy it +# this was added after the user had already been created by the airflow_role +# and so it's failing because it's trying to modify the user. +#- name: Ensure Airflow user +# user: +# name: "airflow" +# group: "airflow" +# system: true +# shell: "/usr/sbin/nologin" +# createhome: "yes" +# home: "/opt/airflow" +# become: true + +- name: Checkout oonidata repo + become_user: airflow + ansible.builtin.git: + repo: 'https://github.com/ooni/data.git' + dest: /opt/airflow/oonidata + version: airflow + +- ansible.builtin.include_role: + name: ooni.airflow_role + tags: + - oonidata + - airflow + vars: + airflow_app_home: /opt/airflow + airflow_dags_folder: /opt/airflow/oonidata/dags/ + airflow_webserver_host: "127.0.0.1" + airflow_webserver_port: 8080 + airflow_webserver_base_url: "https://{{ airflow_public_fqdn }}" + airflow_environment_extra_vars: + - name: AIRFLOW_VAR_DATA_DIR + value: "{{ airflow_app_home }}/data_dir" + airflow_extra_packages: + - postgres + - virtualenv + airflow_services: + airflow_webserver: + service_name: airflow-webserver + enabled: true + running: true + state: started + path: airflow-webserver.service.j2 + airflow_scheduler: + service_name: airflow-scheduler + enabled: true + running: true + state: started + path: airflow-scheduler.service.j2 + +- name: Set correct permissions on oonidata repo dir + ansible.builtin.file: + path: /opt/oonidata + state: directory + mode: '0755' + owner: airflow + recurse: yes + +- ansible.builtin.include_role: + name: nginx + tags: + - oonidata + - nginx + +- ansible.builtin.include_role: + name: dehydrated + tags: + - oonidata + - dehydrated + vars: + ssl_domains: "{{ [ inventory_hostname ] + [ airflow_public_fqdn ] }}" + +- name: Setup airflow nginx config + ansible.builtin.template: + src: nginx-airflow.j2 + dest: /etc/nginx/sites-enabled/02-airflow + owner: root + mode: "0655" + notify: + - Reload nginx + tags: + - oonidata + - config diff --git a/ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 b/ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 new file mode 100644 index 0000000..6c3b3fe --- /dev/null +++ b/ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 @@ -0,0 +1,40 @@ +# ansible-managed in ooni/devops.git + +map $http_upgrade $connection_upgrade { + default upgrade; + '' close; +} + +server { + listen 443 ssl http2; + + include /etc/nginx/ssl_intermediate.conf; + + ssl_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/fullchain.pem; + ssl_certificate_key {{ tls_cert_dir }}/{{ inventory_hostname }}/privkey.pem; + ssl_trusted_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/chain.pem; + + server_name {{ airflow_public_fqdn }}; + access_log /var/log/nginx/{{ airflow_public_fqdn }}.access.log; + error_log /var/log/nginx/{{ airflow_public_fqdn }}.log warn; + + add_header Access-Control-Allow-Origin *; + + ## Airflow reverse proxy + location / { + proxy_pass http://127.0.0.1:8080; + + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + client_max_body_size 100M; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header X-Scheme $scheme; + proxy_buffering off; + } +} diff --git a/tf/environments/prod/dns_records.tf b/tf/environments/prod/dns_records.tf index a24b26e..88fb03d 100644 --- a/tf/environments/prod/dns_records.tf +++ b/tf/environments/prod/dns_records.tf @@ -979,7 +979,7 @@ resource "aws_route53_record" "data1-htz-fsn-prod-ooni-nu-_a_" { records = ["142.132.254.225"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_nu + zone_id = local.dns_zone_ooni_nu } resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" { @@ -987,7 +987,7 @@ resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" { records = ["88.198.54.12"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_nu + zone_id = local.dns_zone_ooni_nu } resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" { @@ -995,7 +995,7 @@ resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" { records = ["168.119.7.188"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_nu + zone_id = local.dns_zone_ooni_nu } resource "aws_route53_record" "clickhouse1-prod-ooni-io-_a_" { @@ -1003,7 +1003,7 @@ resource "aws_route53_record" "clickhouse1-prod-ooni-io-_a_" { records = ["142.132.254.225"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_io + zone_id = local.dns_zone_ooni_io } resource "aws_route53_record" "clickhouse2-prod-ooni-io-_A_" { @@ -1011,7 +1011,7 @@ resource "aws_route53_record" "clickhouse2-prod-ooni-io-_A_" { records = ["88.198.54.12"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_io + zone_id = local.dns_zone_ooni_io } resource "aws_route53_record" "clickhouse3-prod-ooni-io-_A_" { @@ -1019,5 +1019,13 @@ resource "aws_route53_record" "clickhouse3-prod-ooni-io-_A_" { records = ["168.119.7.188"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_io + zone_id = local.dns_zone_ooni_io +} + +resource "aws_route53_record" "airflow-prod-ooni-io-_a_" { + name = "airflow.prod.ooni.io" + records = ["142.132.254.225"] + ttl = "60" + type = "A" + zone_id = local.dns_zone_ooni_io }