diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 1bce4a0..ea5f8f0 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -33,7 +33,7 @@ jobs: SPARK_VERSION: ${{matrix.PYSPARK_VERSION}} run: | pip install --upgrade pip - pip install poetry + pip install poetry==1.7.1 poetry install poetry add pyspark==$SPARK_VERSION poetry run python -m pytest -s tests diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a7a236a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM ubuntu:22.04 + +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update +RUN apt-get install -y software-properties-common +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get install -y python3.8 python3-pip +RUN apt-get install -y python3.8-distutils +RUN apt-get install -y openjdk-11-jdk + +# Update symlink to point to latest +RUN rm /usr/bin/python3 && ln -s /usr/bin/python3.8 /usr/bin/python3 +RUN python3 --version +RUN pip3 --version +RUN java -version +RUN pip install poetry==1.7.1 + +COPY . /python-deequ +WORKDIR python-deequ + +RUN poetry lock --no-update +RUN poetry install +RUN poetry add pyspark==3.3 + +ENV SPARK_VERSION=3.3 +CMD poetry run python -m pytest -s tests diff --git a/README.md b/README.md index 5befb26..cd7eb35 100644 --- a/README.md +++ b/README.md @@ -244,4 +244,14 @@ Take a look at tests in `tests/dataquality` and `tests/jobs` ```bash $ poetry run pytest -``` \ No newline at end of file +``` + +## Running Tests Locally (Docker) + +If you have issues installing the dependencies listed above, another way to run the tests and verify your changes is through Docker. There is a Dockerfile that will install the required dependencies and run the tests in a container. + +``` +docker build . -t spark-3.3-docker-test +docker run spark-3.3-docker-test +``` +