Skip to content

Commit

Permalink
Addressing feedback on PR #41 + other minor changes
Browse files Browse the repository at this point in the history
* Checked for major version in verify_python_version
* More documentation in `generate_base_dockerfile`
* Bumped version to 2.9.0
* Support passing SSL mode for Postgres connection.
* Downgraded to Python 3.11.9 since we don't want to go to Python 3.12
  before sufficient adoption.
* Remove version pinning for Amazon providers since this is covered by
  the Airflow constraints file.
* Update the `requirements.txt` used for development. Removed all but
  the requirements we want, and left the rest for pip to intsall
  automatically. This makes updating the file easier.
* `db_lock` method: renamed `timeout` to `timeout_ms` for clarity.
* Check for both `pip install` and `pip3 install` in
  `pip_install_check.py`.
  • Loading branch information
rafidka committed Apr 19, 2024
1 parent 43df469 commit 1bfaa23
Show file tree
Hide file tree
Showing 24 changed files with 89 additions and 251 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/quality-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ jobs:
# gcc, libcurl-devel: For compiling pycurl (required for our Airflow setup.)
# gzip: Requiring by actions/checkout@v2 to gunzip the source code.
# postgresql-devel: Required for our Airflow setup.
# python3-devel: Required for building some Python modules, e.g. pycurl.
# python3: Self explanatory.
# python3.11-devel: Required for building some Python modules, e.g. pycurl.
# python3.11: Self explanatory.
# tar, wget, xz: For downloading and extracting ShellCheck
dnf update -y
dnf install -y \
gcc \
gzip \
libcurl-devel \
postgresql-devel \
python3 \
python3-devel \
python3.11 \
python3.11-devel \
tar \
wget \
xz
Expand All @@ -40,7 +40,7 @@ jobs:
uses: actions/checkout@v2

- name: Create the necessary Python virtual environments...
run: python3 ./create_venvs.py
run: python3.11 ./create_venvs.py

- name: Run quality checks...
run: python3 ./quality-checks/run_all.py
run: python3.11 ./quality-checks/run_all.py
7 changes: 4 additions & 3 deletions create_venvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@

def verify_python_version():
"""Check if the current Python version is at least 3.9."""
_major, minor, *_ = sys.version_info
if minor < 9:
print("Python 3.9 or higher is required.")
major, minor, *_ = sys.version_info

if major != 3 or minor < 11:
print("Python 3.11 or higher is required.")
sys.exit(1)


Expand Down
11 changes: 4 additions & 7 deletions images/airflow/2.8.0/Dockerfile.base.j2
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,20 @@ FROM public.ecr.aws/amazonlinux/amazonlinux:2023

# Environment variables

# Temporarily downgrading to 2.7.2 to make it easier to test the Docker image
# within Amazon MWAA since 2.7.2 is a version we support.
ENV AIRFLOW_VERSION=2.7.2
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1
ENV AIRFLOW_VERSION=2.9.0

ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt"
ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt"
ENV AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV MWAA_HOME=/usr/local/mwaa
ENV PYTHON_VERSION=3.11.7
ENV PYTHON_VERSION=3.11.9

# We don't want those variables to stay in the final image, so we use ARG instead of ENV.
ARG AIRFLOW_USER_LOCAL_PATH=${AIRFLOW_USER_HOME}/.local
ARG AIRFLOW_USER_LOCAL_BIN_PATH=${AIRFLOW_USER_LOCAL_PATH}/bin
ARG PATH_DEFAULT=${PATH}
ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_LOCAL_BIN_PATH}:${PATH_DEFAULT}
ARG PYTHON_MD5_CHECKSUM=d96c7e134c35a8c46236f8a0e566b69c
ARG PYTHON_MD5_CHECKSUM=22ea467e7d915477152e99d5da856ddc
ARG MARIADB_DOWNLOAD_BASE_URL=https://mirror.mariadb.org/yum/11.1/fedora38-amd64/rpms
ARG MARIADB_RPM_COMMON=MariaDB-common-11.1.2-1.fc38.x86_64.rpm
ARG MARIADB_RPM_COMMON_CHECKSUM=e87371d558efa97724f3728fb214cf19
Expand Down
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/Dockerfile.derivatives.j2
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

{% if bootstrapping_scripts_dev %}

Expand Down
4 changes: 2 additions & 2 deletions images/airflow/2.8.0/Dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.935774
# This file was generated on 2024-04-19 02:30:48.359587
#

FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

USER airflow

Expand Down
4 changes: 2 additions & 2 deletions images/airflow/2.8.0/Dockerfiles/Dockerfile-dev
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.927521
# This file was generated on 2024-04-19 02:30:48.351336
#

FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

# Copy bootstrapping files.
COPY ./bootstrap-dev /bootstrap-dev
Expand Down
4 changes: 2 additions & 2 deletions images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.938417
# This file was generated on 2024-04-19 02:30:48.362267
#

FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

USER airflow

Expand Down
4 changes: 2 additions & 2 deletions images/airflow/2.8.0/Dockerfiles/Dockerfile-explorer-dev
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.930305
# This file was generated on 2024-04-19 02:30:48.354110
#

FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

# Copy bootstrapping files.
COPY ./bootstrap-dev /bootstrap-dev
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.941098
# This file was generated on 2024-04-19 02:30:48.364902
#

FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

USER root

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.933100
# This file was generated on 2024-04-19 02:30:48.356865
#

FROM amazon-mwaa/airflow:2.8.0-base
FROM amazon-mwaa/airflow:2.9.0-base

# Copy bootstrapping files.
COPY ./bootstrap-dev /bootstrap-dev
Expand Down
13 changes: 5 additions & 8 deletions images/airflow/2.8.0/Dockerfiles/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,27 @@
# the Jinja2-templated Dockerfile.j2 file, so you need to change that file
# instead.
#
# This file was generated on 2024-02-22 19:42:44.924226
# This file was generated on 2024-04-19 02:30:48.348008
#

FROM public.ecr.aws/amazonlinux/amazonlinux:2023

# Environment variables

# Temporarily downgrading to 2.7.2 to make it easier to test the Docker image
# within Amazon MWAA since 2.7.2 is a version we support.
ENV AIRFLOW_VERSION=2.7.2
ENV AIRFLOW_AMAZON_PROVIDERS_VERSION=8.7.1
ENV AIRFLOW_VERSION=2.9.0

ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.2/constraints-3.11.txt"
ENV AIRFLOW_CONSTRAINTS_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt"
ENV AIRFLOW_USER_HOME=/usr/local/airflow
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
ENV MWAA_HOME=/usr/local/mwaa
ENV PYTHON_VERSION=3.11.7
ENV PYTHON_VERSION=3.11.9

# We don't want those variables to stay in the final image, so we use ARG instead of ENV.
ARG AIRFLOW_USER_LOCAL_PATH=${AIRFLOW_USER_HOME}/.local
ARG AIRFLOW_USER_LOCAL_BIN_PATH=${AIRFLOW_USER_LOCAL_PATH}/bin
ARG PATH_DEFAULT=${PATH}
ARG PATH_AIRFLOW_USER=${AIRFLOW_USER_LOCAL_BIN_PATH}:${PATH_DEFAULT}
ARG PYTHON_MD5_CHECKSUM=d96c7e134c35a8c46236f8a0e566b69c
ARG PYTHON_MD5_CHECKSUM=22ea467e7d915477152e99d5da856ddc
ARG MARIADB_DOWNLOAD_BASE_URL=https://mirror.mariadb.org/yum/11.1/fedora38-amd64/rpms
ARG MARIADB_RPM_COMMON=MariaDB-common-11.1.2-1.fc38.x86_64.rpm
ARG MARIADB_RPM_COMMON_CHECKSUM=e87371d558efa97724f3728fb214cf19
Expand Down
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/bin/airflow-user/safe-pip-install
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Define an array of required packages
REQUIRED_PACKAGES=(
"apache-airflow-providers-amazon[aiobotocore]==${AIRFLOW_AMAZON_PROVIDERS_VERSION}"
"apache-airflow-providers-amazon[aiobotocore]"
"apache-airflow[celery,statsd]==${AIRFLOW_VERSION}"
"celery[sqs]"
psycopg2
Expand Down
4 changes: 2 additions & 2 deletions images/airflow/2.8.0/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ python3 ../generate-dockerfiles.py
deactivate

# Build the base image.
docker build -f ./Dockerfiles/Dockerfile.base -t amazon-mwaa/airflow:2.8.0-base ./
docker build -f ./Dockerfiles/Dockerfile.base -t amazon-mwaa/airflow:2.9.0-base ./

# Build the derivatives.
for dev in "True" "False"; do
for build_type in "standard" "explorer" "explorer-privileged"; do
dockerfile_name="Dockerfile"
tag_name="amazon-mwaa/airflow:2.8.0"
tag_name="amazon-mwaa/airflow:2.9.0"

if [[ "$build_type" != "standard" ]]; then
dockerfile_name="${dockerfile_name}-${build_type}"
Expand Down
3 changes: 2 additions & 1 deletion images/airflow/2.8.0/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.8"

x-airflow-common: &airflow-common
image: amazon-mwaa/airflow:2.8.0
image: amazon-mwaa/airflow:2.9.0
restart: always
environment:
# AWS credentials
Expand All @@ -20,6 +20,7 @@ x-airflow-common: &airflow-common
MWAA__DB__POSTGRES_USER: "airflow"
MWAA__DB__POSTGRES_PASSWORD: "airflow"
MWAA__DB__POSTGRES_DB: "airflow"
MWAA__DB__POSTGRES_SSLMODE: "prefer"

# SQS configuration
MWAA__SQS__CREATE_QUEUE: True
Expand Down
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/explore-image.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/bash
docker container run -it amazon-mwaa/airflow:2.8.0-explorer-dev
docker container run -it amazon-mwaa/airflow:2.9.0-explorer-dev
5 changes: 2 additions & 3 deletions images/airflow/2.8.0/install_pip_packages.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash

AIRFLOW_AMAZON_PROVIDERS_VERSION=8.13.0
AIRFLOW_VERSION=2.8.0
AIRFLOW_VERSION=2.9.1
PYTHON_MAJOR_MINOR_VERSION=3.11

CONSTRAINT_FILE="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt"
Expand All @@ -12,5 +11,5 @@ pip3 install --constraint "${CONSTRAINT_FILE}" \
psycopg2 \
"celery[sqs]" \
"apache-airflow[celery,statsd]==${AIRFLOW_VERSION}" \
"apache-airflow-providers-amazon[aiobotocore]==${AIRFLOW_AMAZON_PROVIDERS_VERSION}" \
"apache-airflow-providers-amazon[aiobotocore]" \
watchtower
2 changes: 1 addition & 1 deletion images/airflow/2.8.0/python/mwaa/config/airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def get_airflow_celery_config() -> Dict[str, str]:
"AIRFLOW__CELERY__CELERY_CONFIG_OPTIONS": celery_config_module_path,
"AIRFLOW__CELERY__RESULT_BACKEND": f"db+{get_db_connection_string()}",
"AIRFLOW__CELERY__WORKER_ENABLE_REMOTE_CONTROL": "False",
"AIRFLOW__CORE__EXECUTOR": "CeleryExecutor",
# Not a Celery config per-se, but is used by the Celery executor.
"AIRFLOW__CORE__EXECUTOR": "CeleryExecutor",
"AIRFLOW__OPERATORS__DEFAULT_QUEUE": get_sqs_queue_name(),
}

Expand Down
7 changes: 6 additions & 1 deletion images/airflow/2.8.0/python/mwaa/config/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,14 @@ def get_db_connection_string() -> str:
"MWAA__DB__POSTGRES_HOST",
"MWAA__DB__POSTGRES_PORT",
"MWAA__DB__POSTGRES_DB",
"MWAA__DB__POSTGRES_SSLMODE",
]
try:
(
postgres_host,
postgres_port,
postgres_db,
postgres_sslmode,
) = itemgetter(*env_vars_names)(os.environ)
(postgres_user, postgres_password) = get_db_credentials()
except Exception as e:
Expand All @@ -84,8 +86,11 @@ def get_db_connection_string() -> str:
f"following exception: {e}"
)

if not postgres_sslmode:
postgres_sslmode = 'require'

protocol = "postgresql+psycopg2"
creds = f"{postgres_user}:{postgres_password}"
addr = f"{postgres_host}:{postgres_port}"
# TODO We need to do what is the necessary to enforce 'require'.
return f"{protocol}://{creds}@{addr}/{postgres_db}?sslmode=prefer"
return f"{protocol}://{creds}@{addr}/{postgres_db}?sslmode={postgres_sslmode}"
4 changes: 3 additions & 1 deletion images/airflow/2.8.0/python/mwaa/config/sqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def _get_queue_name_from_url(queue_url: str) -> str:
:returns The name of the queue or None if the URL is invalid.
"""
try:
# Validate the protocol.
# Validate the protocol (to flag accidentally passing of sqs://
# protocol which is just a Celery convention, rather than an
# actual protocol.)
if not queue_url.startswith("http://") and not queue_url.startswith("https://"):
raise ValueError(
f"URL {queue_url} is should start with http:// or https://"
Expand Down
12 changes: 6 additions & 6 deletions images/airflow/2.8.0/python/mwaa/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def abort(err_msg: str, exit_code: int = 1):
F = TypeVar("F", bound=Callable[..., Any])


def db_lock(lock_id: int, timeout: int = 300 * 1000) -> Callable[[F], F]:
def db_lock(lock_id: int, timeout_ms: int = 300 * 1000) -> Callable[[F], F]:
"""
Generate a decorator that can be used to protect a function by a database lock.
Expand All @@ -66,22 +66,22 @@ def db_lock(lock_id: int, timeout: int = 300 * 1000) -> Callable[[F], F]:
same lock ID, only one process will be granted the lock at one time. However,
if the processes have different lock IDs, they will be granted the locks at the
same time.
:param timeout: The maximum time the process is allowed to hold the lock. After this
time expires, the lock is automatically released.
:param timeout_ms: The maximum time, in milliseconds, the process is allowed to hold
the lock. After this time expires, the lock is automatically released.
:returns A decorator that can be applied to a function to protect it with a DB lock.
"""
def decorator(func: F) -> F:
def wrapper(*args: Any, **kwargs: Any) -> Any:
func_name: str = func.__name__
db_engine: Engine = create_engine(
get_db_connection_string() # Assuming this is defined elsewhere
get_db_connection_string()
)
print(f"Obtaining lock for {func_name}...")
with db_engine.connect() as conn: # type: ignore
try:
conn.execute( # type: ignore
text("SET LOCK_TIMEOUT to :timeout"), {"timeout": timeout}
text("SET LOCK_TIMEOUT to :timeout"), {"timeout": timeout_ms}
)
conn.execute( # type: ignore
text("SELECT pg_advisory_lock(:id)"), {"id": lock_id}
Expand All @@ -93,7 +93,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
except Exception as e:
abort(
f"Failed while executing {func_name}. " + f"Error: {e}."
) # Assuming abort is defined elsewhere
)
except Exception as e:
abort(
f"Failed to obtain DB lock for {func_name}. " + f"Error: {e}."
Expand Down
Loading

0 comments on commit 1bfaa23

Please sign in to comment.