Skip to content

Commit

Permalink
Merge branch 'master' of github.com:skypilot-org/skypilot into fast-j…
Browse files Browse the repository at this point in the history
…ob-submission
  • Loading branch information
Michaelvll committed Dec 6, 2024
2 parents e6e7f28 + 6e50832 commit 2cda3e2
Show file tree
Hide file tree
Showing 133 changed files with 5,696 additions and 2,493 deletions.
20 changes: 13 additions & 7 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,35 @@ jobs:
python-version: ["3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install yapf==0.32.0
pip install toml==0.10.2
pip install black==22.10.0
pip install isort==5.12.0
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install yapf==0.32.0
uv pip install toml==0.10.2
uv pip install black==22.10.0
uv pip install isort==5.12.0
- name: Running yapf
run: |
source ~/test-env/bin/activate
yapf --diff --recursive ./ --exclude 'sky/skylet/ray_patches/**' \
--exclude 'sky/skylet/providers/ibm/**'
- name: Running black
run: |
source ~/test-env/bin/activate
black --diff --check sky/skylet/providers/ibm/
- name: Running isort for black formatted files
run: |
source ~/test-env/bin/activate
isort --diff --check --profile black -l 88 -m 3 \
sky/skylet/providers/ibm/
- name: Running isort for yapf formatted files
run: |
source ~/test-env/bin/activate
isort --diff --check ./ --sg 'sky/skylet/ray_patches/**' \
--sg 'sky/skylet/providers/ibm/**'
22 changes: 0 additions & 22 deletions .github/workflows/mypy-generic.yml

This file was deleted.

15 changes: 10 additions & 5 deletions .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ on:
branches:
- master
- 'releases/**'
merge_group:

jobs:
mypy:
runs-on: ubuntu-latest
Expand All @@ -19,15 +21,18 @@ jobs:
python-version: ["3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install mypy==$(grep mypy requirements-dev.txt | cut -d'=' -f3)
pip install $(grep types- requirements-dev.txt | tr '\n' ' ')
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install mypy==$(grep mypy requirements-dev.txt | cut -d'=' -f3)
uv pip install $(grep types- requirements-dev.txt | tr '\n' ' ')
- name: Running mypy
run: |
source ~/test-env/bin/activate
mypy $(cat tests/mypy_files.txt)
16 changes: 10 additions & 6 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@ jobs:
python-version: ["3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ".[all]"
pip install pylint==2.14.5
pip install pylint-quotes==0.2.3
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
uv pip install ".[all]"
uv pip install pylint==2.14.5
uv pip install pylint-quotes==0.2.3
- name: Analysing the code with pylint
run: |
source ~/test-env/bin/activate
pylint --load-plugins pylint_quotes sky
31 changes: 13 additions & 18 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,21 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}

- name: Cache dependencies
uses: actions/cache@v3
if: startsWith(runner.os, 'Linux')
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-pytest-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-pytest-${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[all]"
pip install pytest pytest-xdist pytest-env>=0.6 memory-profiler==0.61.0
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
# Use -e to include examples and tests folder in the path for unit
# tests to access them.
uv pip install -e ".[all]"
uv pip install pytest pytest-xdist pytest-env>=0.6 memory-profiler==0.61.0
- name: Run tests with pytest
run: SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 pytest -n 0 --dist no ${{ matrix.test-path }}
run: |
source ~/test-env/bin/activate
SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 pytest -n 0 --dist no ${{ matrix.test-path }}
16 changes: 10 additions & 6 deletions .github/workflows/test-doc-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,28 @@ on:
merge_group:

jobs:
format:
doc-build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
uv pip install ".[all]"
cd docs
pip install -r ./requirements-docs.txt
uv pip install -r ./requirements-docs.txt
- name: Build documentation
run: |
source ~/test-env/bin/activate
cd ./docs
./build.sh
74 changes: 74 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Ensure this configuration aligns with format.sh and requirements.txt
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files

- repo: https://github.com/psf/black
rev: 22.10.0 # Match the version from requirements
hooks:
- id: black
name: black (IBM specific)
files: "^sky/skylet/providers/ibm/.*" # Match only files in the IBM directory

- repo: https://github.com/pycqa/isort
rev: 5.12.0 # Match the version from requirements
hooks:
# First isort command
- id: isort
name: isort (general)
args:
- "--sg=build/**" # Matches "${ISORT_YAPF_EXCLUDES[@]}"
- "--sg=sky/skylet/providers/ibm/**"
files: "^(sky|tests|examples|llm|docs)/.*" # Only match these directories
# Second isort command
- id: isort
name: isort (IBM specific)
args:
- "--profile=black"
- "-l=88"
- "-m=3"
files: "^sky/skylet/providers/ibm/.*" # Only match IBM-specific directory

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.991 # Match the version from requirements
hooks:
- id: mypy
args:
# From tests/mypy_files.txt
- "sky"
- "--exclude"
- "sky/benchmark|sky/callbacks|sky/skylet/providers/azure|sky/resources.py|sky/backends/monkey_patches"
pass_filenames: false
additional_dependencies:
- types-PyYAML
- types-requests<2.31 # Match the condition in requirements.txt
- types-setuptools
- types-cachetools
- types-pyvmomi

- repo: https://github.com/google/yapf
rev: v0.32.0 # Match the version from requirements
hooks:
- id: yapf
name: yapf
exclude: (build/.*|sky/skylet/providers/ibm/.*) # Matches exclusions from the script
args: ['--recursive', '--parallel'] # Only necessary flags
additional_dependencies: [toml==0.10.2]

- repo: https://github.com/pylint-dev/pylint
rev: v2.14.5 # Match the version from requirements
hooks:
- id: pylint
additional_dependencies:
- pylint-quotes==0.2.3 # Match the version from requirements
name: pylint
args:
- --rcfile=.pylintrc # Use your custom pylint configuration
- --load-plugins=pylint_quotes # Load the pylint-quotes plugin
files: ^sky/ # Only include files from the 'sky/' directory
exclude: ^sky/skylet/providers/ibm/
15 changes: 8 additions & 7 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Contributing to SkyPilot

Thank you for your interest in contributing to SkyPilot! We welcome and value
all contributions to the project, including but not limited to:
Thank you for your interest in contributing to SkyPilot! We welcome and value
all contributions to the project, including but not limited to:

* [Bug reports](https://github.com/skypilot-org/skypilot/issues) and [discussions](https://github.com/skypilot-org/skypilot/discussions)
* [Pull requests](https://github.com/skypilot-org/skypilot/pulls) for bug fixes and new features
Expand All @@ -26,7 +26,7 @@ pip install -r requirements-dev.txt
### Testing
To run smoke tests (NOTE: Running all smoke tests launches ~20 clusters):
```
# Run all tests except for AWS and Lambda Cloud
# Run all tests on AWS and Azure (default smoke test clouds)
pytest tests/test_smoke.py
# Terminate a test's cluster even if the test failed (default is to keep it around for debugging)
Expand All @@ -41,11 +41,11 @@ pytest tests/test_smoke.py::test_minimal
# Only run managed spot tests
pytest tests/test_smoke.py --managed-spot
# Only run test for AWS + generic tests
pytest tests/test_smoke.py --aws
# Only run test for GCP + generic tests
pytest tests/test_smoke.py --gcp
# Change cloud for generic tests to aws
pytest tests/test_smoke.py --generic-cloud aws
# Change cloud for generic tests to Azure
pytest tests/test_smoke.py --generic-cloud azure
```

For profiling code, use:
Expand Down Expand Up @@ -78,6 +78,7 @@ It has some convenience features which you might find helpful (see [Dockerfile](
- If relevant, add tests for your changes. For changes that touch the core system, run the [smoke tests](#testing) and ensure they pass.
- Follow the [Google style guide](https://google.github.io/styleguide/pyguide.html).
- Ensure code is properly formatted by running [`format.sh`](https://github.com/skypilot-org/skypilot/blob/master/format.sh).
- [Optional] You can also install pre-commit hooks by running `pre-commit install` to automatically format your code on commit.
- Push your changes to your fork and open a pull request in the SkyPilot repository.
- In the PR description, write a `Tested:` section to describe relevant tests performed.

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_k8s
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ARG DEBIAN_FRONTEND=noninteractive

# Initialize conda for root user, install ssh and other local dependencies
RUN apt update -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse socat netcat curl -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse socat netcat-openbsd curl -y && \
rm -rf /var/lib/apt/lists/* && \
apt remove -y python3 && \
conda init
Expand Down
3 changes: 2 additions & 1 deletion Dockerfile_k8s_gpu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ARG DEBIAN_FRONTEND=noninteractive
# We remove cuda lists to avoid conflicts with the cuda version installed by ray
RUN rm -rf /etc/apt/sources.list.d/cuda* && \
apt update -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse unzip socat netcat curl -y && \
apt install git gcc rsync sudo patch openssh-server pciutils nano fuse unzip socat netcat-openbsd curl -y && \
rm -rf /var/lib/apt/lists/*

# Setup SSH and generate hostkeys
Expand Down Expand Up @@ -36,6 +36,7 @@ SHELL ["/bin/bash", "-c"]

# Install conda and other dependencies
# Keep the conda and Ray versions below in sync with the ones in skylet.constants
# Keep this section in sync with the custom image optimization recommendations in our docs (kubernetes-getting-started.rst)
RUN curl https://repo.anaconda.com/miniconda/Miniconda3-py310_23.11.0-2-Linux-x86_64.sh -o Miniconda3-Linux-x86_64.sh && \
bash Miniconda3-Linux-x86_64.sh -b && \
eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && conda config --set auto_activate_base true && conda activate base && \
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,11 @@ SkyPilot then performs the heavy-lifting for you, including:
Refer to [Quickstart](https://skypilot.readthedocs.io/en/latest/getting-started/quickstart.html) to get started with SkyPilot.

## More Information
To learn more, see our [documentation](https://skypilot.readthedocs.io/en/latest/), [blog](https://blog.skypilot.co/), and [community integrations](https://blog.skypilot.co/community/).
To learn more, see [Concept: Sky Computing](https://docs.skypilot.co/en/latest/sky-computing.html), [SkyPilot docs](https://skypilot.readthedocs.io/en/latest/), and [SkyPilot blog](https://blog.skypilot.co/).

<!-- Keep this section in sync with index.rst in SkyPilot Docs -->
Runnable examples:
- [**AI Gallery**](https://docs.skypilot.co/en/latest/gallery/index.html)
- LLMs on SkyPilot
- [Llama 3.2: lightweight and vision models](./llm/llama-3_2/)
- [Pixtral](./llm/pixtral/)
Expand All @@ -183,7 +184,7 @@ Runnable examples:
- [LocalGPT](./llm/localgpt)
- [Falcon](./llm/falcon)
- Add yours here & see more in [`llm/`](./llm)!
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/nemo.yaml), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2), [Airflow](./examples/airflow/training_workflow) and [many more (`examples/`)](./examples).
- Framework examples: [PyTorch DDP](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_torch.yaml), [DeepSpeed](./examples/deepspeed-multinode/sky.yaml), [JAX/Flax on TPU](https://github.com/skypilot-org/skypilot/blob/master/examples/tpu/tpuvm_mnist.yaml), [Stable Diffusion](https://github.com/skypilot-org/skypilot/tree/master/examples/stable_diffusion), [Detectron2](https://github.com/skypilot-org/skypilot/blob/master/examples/detectron2_docker.yaml), [Distributed](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_distributed_tf_app.py) [TensorFlow](https://github.com/skypilot-org/skypilot/blob/master/examples/resnet_app_storage.yaml), [Ray Train](examples/distributed_ray_train/ray_train.yaml), [NeMo](https://github.com/skypilot-org/skypilot/blob/master/examples/nemo/), [programmatic grid search](https://github.com/skypilot-org/skypilot/blob/master/examples/huggingface_glue_imdb_grid_search_app.py), [Docker](https://github.com/skypilot-org/skypilot/blob/master/examples/docker/echo_app.yaml), [Cog](https://github.com/skypilot-org/skypilot/blob/master/examples/cog/), [Unsloth](https://github.com/skypilot-org/skypilot/blob/master/examples/unsloth/unsloth.yaml), [Ollama](https://github.com/skypilot-org/skypilot/blob/master/llm/ollama), [llm.c](https://github.com/skypilot-org/skypilot/tree/master/llm/gpt-2), [Airflow](./examples/airflow/training_workflow) and [many more (`examples/`)](./examples).

Case Studies and Integrations: [Community Spotlights](https://blog.skypilot.co/community/)

Expand Down
4 changes: 2 additions & 2 deletions docs/source/_static/custom.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ document.addEventListener('DOMContentLoaded', function () {
script.setAttribute('data-project-logo', 'https://avatars.githubusercontent.com/u/109387420?s=100&v=4');
script.setAttribute('data-modal-disclaimer', 'Results are automatically generated and may be inaccurate or contain inappropriate information. Do not include any sensitive information in your query.\n**To get further assistance, you can chat directly with the development team** by joining the [SkyPilot Slack](https://slack.skypilot.co/).');
script.setAttribute('data-modal-title', 'SkyPilot Docs AI - Ask a Question.');
script.setAttribute('data-button-position-bottom', '85px');
script.setAttribute('data-button-position-bottom', '100px');
script.async = true;
document.head.appendChild(script);
});
Expand All @@ -25,14 +25,14 @@ document.addEventListener('DOMContentLoaded', function () {
document.addEventListener('DOMContentLoaded', () => {
// New items:
const newItems = [
{ selector: '.caption-text', text: 'SkyServe: Model Serving' },
{ selector: '.toctree-l1 > a', text: 'Managed Jobs' },
{ selector: '.toctree-l1 > a', text: 'Pixtral (Mistral AI)' },
{ selector: '.toctree-l1 > a', text: 'Many Parallel Jobs' },
{ selector: '.toctree-l1 > a', text: 'Reserved, Capacity Blocks, DWS' },
{ selector: '.toctree-l1 > a', text: 'Llama 3.2 (Meta)' },
{ selector: '.toctree-l1 > a', text: 'Admin Policy Enforcement' },
{ selector: '.toctree-l1 > a', text: 'Using Existing Machines' },
{ selector: '.toctree-l1 > a', text: 'Concept: Sky Computing' },
];
newItems.forEach(({ selector, text }) => {
document.querySelectorAll(selector).forEach((el) => {
Expand Down
Loading

0 comments on commit 2cda3e2

Please sign in to comment.