fix(moe): fix moe zero mode bug (#548) #7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: unit-tests | |
on: | |
push: | |
branches: | |
- "develop" | |
- "main" | |
paths-ignore: | |
- "cmds/**" | |
- "**.md" | |
pull_request: | |
branches: | |
- "develop" | |
- "main" | |
paths-ignore: | |
- "cmds/**" | |
- "**.md" | |
env: | |
WORKSPACE_PREFIX: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-4) | |
SLURM_PARTITION: llm_s | |
jobs: | |
check-requirements: | |
runs-on: [t_cluster] | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: check-requirements | |
run: | | |
changed_files=$(git diff --name-only -r HEAD^1 HEAD) | |
echo $changed_files | |
if [[ $changed_files =~ "runtime.txt" ]]; then | |
pip install -r requirements/runtime.txt | |
fi | |
if [[ $changed_files =~ "torch.txt" ]]; then | |
pip install -r requirements/torch.txt | |
fi | |
unit_tests_core_pipeline: | |
if: ${{ !cancelled() }} | |
needs: check-requirements | |
runs-on: [t_cluster] | |
timeout-minutes: 20 | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
- name: core_pipeline | |
run: | | |
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 | |
export PYTHONPATH=$PWD:$PYTHONPATH | |
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=internlm-ut-${GITHUB_RUN_ID}-${GITHUB_JOB} --quotatype=spot -N 1 -n 1 --gres=gpu:8 python -m pytest -s -v ./tests/test_core/test_pipeline.py | |
unit_tests_utils_storage_manager: | |
if: ${{ !cancelled() }} | |
needs: check-requirements | |
runs-on: [t_cluster] | |
timeout-minutes: 20 | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
- name: utils_storage_manager | |
run: | | |
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 | |
export PYTHONPATH=$PWD:$PYTHONPATH | |
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=internlm-ut-${GITHUB_RUN_ID}-${GITHUB_JOB} --quotatype=spot -N 1 -n 1 --gres=gpu:8 python -m pytest -s -v ./tests/test_utils/test_storage_manager.py | |
unit_tests_model_fused_precision: | |
if: ${{ !cancelled() }} | |
needs: check-requirements | |
runs-on: [t_cluster] | |
timeout-minutes: 5 | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
- name: model_fused_precision | |
run: | | |
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 | |
export PYTHONPATH=$PWD:$PYTHONPATH | |
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=internlm-ut-${GITHUB_RUN_ID}-${GITHUB_JOB} --quotatype=spot -N 1 -n 1 --gres=gpu:8 python -m pytest -s -v ./tests/test_model/test_fused_precision/test_fused_precision.py | |
unit_tests_data_batch_sampler: | |
if: ${{ !cancelled() }} | |
needs: check-requirements | |
runs-on: [t_cluster] | |
timeout-minutes: 10 | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
- name: data_batch_sample | |
run: | | |
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 | |
export PYTHONPATH=$PWD:$PYTHONPATH | |
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=internlm-ut-${GITHUB_RUN_ID}-${GITHUB_JOB} --quotatype=spot -N 1 -n 1 --gres=gpu:8 python -m pytest -s -v ./tests/test_data/test_batch_sampler.py | |
unit_tests_utils_timeout: | |
if: ${{ !cancelled() }} | |
needs: check-requirements | |
runs-on: [t_cluster] | |
timeout-minutes: 5 | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
- name: utils_timeout | |
run: | | |
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 | |
export PYTHONPATH=$PWD:$PYTHONPATH | |
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=internlm-ut-${GITHUB_RUN_ID}-${GITHUB_JOB} --quotatype=spot -N 1 -n 1 --gres=gpu:1 python -m pytest -s -v ./tests/test_utils/test_timeout.py | |
unit_tests_utils_model_checkpoint: | |
if: ${{ !cancelled() }} | |
needs: check-requirements | |
runs-on: [t_cluster] | |
timeout-minutes: 5 | |
steps: | |
- name: mask env | |
run: | | |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" | |
- uses: actions/checkout@v3 | |
- name: utils_model_checkpoint | |
run: | | |
source /mnt/petrelfs/share_data/llm_env/env/llm-flash2.0 | |
export PYTHONPATH=$PWD:$PYTHONPATH | |
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=internlm-ut-${GITHUB_RUN_ID}-${GITHUB_JOB} --quotatype=spot -N 1 -n 1 --gres=gpu:2 python -m pytest -s -v ./tests/test_utils/test_model_checkpoint.py |