From 11bafa3ff5692f1a8a07f384b076f22a02c98be5 Mon Sep 17 00:00:00 2001 From: rdsharma26 <65777064+rdsharma26@users.noreply.github.com> Date: Mon, 1 Jul 2024 20:48:15 -0400 Subject: [PATCH] Adding support for Spark 3.5 (#210) * Adding support for Spark 3.5 Skipping Spark 3.4 for now, due to dependency issue with Breeze. Will revisit in future PR. * Added spark 3.5 to workflow --- .github/workflows/base.yml | 2 +- Dockerfile | 4 ++-- pydeequ/configs.py | 1 + pyproject.toml | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 67c6268..d5e4ec7 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - PYSPARK_VERSION: ["3.1.3", "3.2", "3.3"] + PYSPARK_VERSION: ["3.1.3", "3.2", "3.3", "3.5"] steps: - uses: actions/checkout@v3 diff --git a/Dockerfile b/Dockerfile index bdd9099..da30d1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,8 +22,8 @@ COPY poetry.lock /python-deequ WORKDIR python-deequ RUN poetry install -vvv -RUN poetry add pyspark==3.3 -vvv +RUN poetry add pyspark==3.5.0 -vvv -ENV SPARK_VERSION=3.3 +ENV SPARK_VERSION=3.5 COPY . /python-deequ CMD poetry run python -m pytest -s tests diff --git a/pydeequ/configs.py b/pydeequ/configs.py index ea4d0e8..d4d4b31 100644 --- a/pydeequ/configs.py +++ b/pydeequ/configs.py @@ -5,6 +5,7 @@ SPARK_TO_DEEQU_COORD_MAPPING = { + "3.5": "com.amazon.deequ:deequ:2.0.7-spark-3.5", "3.3": "com.amazon.deequ:deequ:2.0.7-spark-3.3", "3.2": "com.amazon.deequ:deequ:2.0.7-spark-3.2", "3.1": "com.amazon.deequ:deequ:2.0.7-spark-3.1" diff --git a/pyproject.toml b/pyproject.toml index c9fa38e..e4728dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ [tool.poetry.dependencies] -python = ">=3.7,<4" +python = ">=3.8,<4" numpy = ">=1.14.1" pandas = ">=0.23.0" pyspark = { version = ">=2.4.7, <3.3.0", optional = true }