Skip to content

Commit

Permalink
Merge pull request #5 from kbase/dev_spark_util
Browse files Browse the repository at this point in the history
add tests
  • Loading branch information
Tianhao-Gu authored May 20, 2024
2 parents 9cbdf6c + 70c820b commit 88c6c52
Show file tree
Hide file tree
Showing 7 changed files with 1,658 additions and 2 deletions.
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ updates:
time: '11:00'
open-pull-requests-limit: 25

# Python
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "monthly"
open-pull-requests-limit: 25

# GitHub Actions
- package-ecosystem: "github-actions"
directory: ".github/workflows"
Expand Down
50 changes: 50 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: CDM Jupyterhub tests

on:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
push:
# run workflow when merging to main or develop
branches:
- main
- master
- develop

jobs:

cdm_jupyterhub_tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]

steps:

- name: Repo checkout
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
# tried VaultVulp/action-pipenv but pytest wasn't on the path post action
shell: bash
run: |
pip install pipenv
pipenv sync --system --dev
- name: Run tests
shell: bash
run: PYTHONPATH=. pytest --cov=src --cov-report=xml test

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
8 changes: 6 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@ RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*

# Install Jupyterlab and other python dependencies
RUN pip3 install jupyterlab==4.2.0 pyspark==3.5.1
# install pipenv
RUN pip3 install pipenv

# install python dependencies
COPY Pipfile* ./
RUN pipenv sync --system

COPY ./src/ /src
ENV PYTHONPATH "${PYTHONPATH}:/src"
Expand Down
18 changes: 18 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
jupyterlab= "==4.2.0"
pyspark= "==3.5.1"
boto3 = "==1.34.109"

[dev-packages]
pytest = "==8.2.0"
coverage = "==7.5.1"
pytest-cov = "==5.0.0"
ipython = "==8.24.0"

[requires]
python_version = "3.11"
1,497 changes: 1,497 additions & 0 deletions Pipfile.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ print(pi)
sc.stop()
```

## Development

### Running tests

Python 3.11 must be installed on the system.

```
pipenv sync --dev # only the first time or when Pipfile.lock changes
pipenv shell
PYTHONPATH=. pytest test
```

## Racher Deployment

### Environment Variables
Expand Down
68 changes: 68 additions & 0 deletions test/src/spark/utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import socket
from unittest.mock import patch

import pytest
from pyspark.sql import SparkSession

from src.spark.utils import get_spark_session


@pytest.fixture(scope="session")
def mock_spark_master():
"""Create a mock Spark master on an available port."""
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind(('localhost', 0)) # Bind to an available port
port = server_socket.getsockname()[1]
server_socket.listen(1)

print(f"Mock Spark master running on port: {port}")

yield port

server_socket.close()
print("Mock Spark master closed.")


@pytest.fixture
def spark_session_local():
"""Provide a local Spark session for testing."""
with patch.dict('os.environ', {}):
spark_session = get_spark_session("TestApp", local=True)
print("Created local Spark session.")
try:
yield spark_session
finally:
spark_session.stop()
print("Stopped local Spark session.")


@pytest.fixture
def spark_session_non_local(mock_spark_master):
"""Provide a non-local Spark session for testing."""
port = mock_spark_master
spark_master_url = f"spark://localhost:{port}"
print(f"Using Spark master URL: {spark_master_url}")

with patch.dict('os.environ', {"SPARK_MASTER_URL": spark_master_url}):
spark_session = get_spark_session("TestApp", local=False)
print("Created non-local Spark session.")
try:
yield spark_session, port
finally:
spark_session.stop()
print("Stopped non-local Spark session.")


def test_spark_session_local(spark_session_local):
"""Test local Spark session configuration."""
assert isinstance(spark_session_local, SparkSession)
assert spark_session_local.conf.get("spark.master") == "local[*]"
assert spark_session_local.conf.get("spark.app.name") == "TestApp"


def test_spark_session_non_local(spark_session_non_local):
"""Test non-local Spark session configuration."""
spark_session, port = spark_session_non_local
assert isinstance(spark_session, SparkSession)
assert spark_session.conf.get("spark.master") == f"spark://localhost:{port}"
assert spark_session.conf.get("spark.app.name") == "TestApp"

0 comments on commit 88c6c52

Please sign in to comment.