diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index ebe70527..927ef362 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,53 +1,11 @@ -name: Build and run tests +name: cml_runtimes -# Controls when the action will run. on: - # Triggers the workflow on push events for the main branch - push: - branches: [ main ] - # Triggers the workflow on pull requests to main branch pull_request: branches: [ main ] - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - jobs: - build: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v3 - - - name: Set up Python 3.6.8 - uses: actions/setup-python@v3 - with: - python-version: 3.6.8 - - - name: Check package build - run: | - python -m pip install --upgrade pip - - test: - runs-on: ubuntu-20.04 - steps: - # Checks-out your repository under $GITHUB_WORKSPACE - - uses: actions/checkout@v3 - - - uses: actions/setup-python@v3 - with: - python-version: 3.6.8 - cache: 'pip' - - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install .[dev] - - - name: Run pytest - run: | - pytest -v - commit-hooks: runs-on: ubuntu-20.04 steps: @@ -55,7 +13,7 @@ jobs: - uses: actions/setup-python@v3 with: - python-version: 3.6.8 + python-version: 3.10.13 cache: 'pip' - name: Install Python dependencies @@ -66,3 +24,26 @@ jobs: - name: Check commit hooks run: | pre-commit run --all-files + + testing-cml: + runs-on: ubuntu-latest + strategy: + matrix: + cml_version: ["3.8", "3.9", "3.10","3.11"] + steps: + - name: checkout ml-runtimes #https://github.com/cloudera/ml-runtimes + uses: actions/checkout@master + with: + repository: cloudera/ml-runtimes + - name: build runtime cml_${{matrix.cml_version}} + run: docker build -t cml:${{matrix.cml_version}} -f 'pbj-workbench-python${{matrix.cml_version}}-standard.Dockerfile' . + - name: checkout to repository + uses: actions/checkout@v3 + - name: create container + run: docker run -id --name container_${{matrix.cml_version}} -v"$(pwd)"://home/cdsw cml:${{matrix.cml_version}} + - name: build in dev mode + run: docker exec container_${{matrix.cml_version}} pip install ."[dev]" + - name: check env + run: docker exec container_${{matrix.cml_version}} pip list + - name: test + run: docker exec container_${{matrix.cml_version}} pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7fb5d668..d808b125 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: entry: pre-commits/check_added_large_files.py name: Check for files larger than 5 MB language: script - stages: [commit] + stages: [pre-commit] args: [ "--maxkb=5120" ] #works @@ -19,7 +19,7 @@ repos: entry: pre-commits/end_of_line_fixer.py name: Check for a blank line at the end of scripts (auto-fixes) language: script - stages: [commit] + stages: [pre-commit] #works - repo: local @@ -28,7 +28,7 @@ repos: entry: pre-commits/remove_whitespace.py name: Check for trailing whitespaces (auto-fixes) language: script - stages: [commit] + stages: [pre-commit] #works - repo: local @@ -37,7 +37,7 @@ repos: entry: pre-commits/mixed_line_endings.py name: Check for consistent end of line type LF to CRLF to CR (auto-fixes) language: script - stages: [commit] + stages: [pre-commit] #works #if using on different file types, it will need a seperate hook per file type @@ -48,7 +48,7 @@ repos: name: isort - Sort Python imports (auto-fixes) language: system types: [python] - stages: [commit] + stages: [pre-commit] args: [ "--profile", "black", "--filter-files" ] #works @@ -58,7 +58,7 @@ repos: entry: nbstripout name: nbstripout - Strip outputs from notebooks (auto-fixes) language: system - stages: [commit] + stages: [pre-commit] # args: # - --extra-keys # - "metadata.colab metadata.kernelspec cell.metadata.colab cell.metadata.executionInfo cell.metadata.id cell.metadata.outputId" @@ -71,7 +71,7 @@ repos: name: black - consistent Python code formatting (auto-fixes) language: system types: [python] - stages: [commit] + stages: [pre-commit] args: ["--verbose"] exclude: ^playground/ @@ -83,7 +83,7 @@ repos: name: flake8 - Python linting language: system types: [python] - stages: [commit] + stages: [pre-commit] # works in testing @@ -96,7 +96,7 @@ repos: #args: [scan, audit] language: system types: [python] - stages: [commit] + stages: [pre-commit] diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py index f8563391..119f6fe9 100644 --- a/mbs_results/staging/data_cleaning.py +++ b/mbs_results/staging/data_cleaning.py @@ -66,8 +66,10 @@ def clean_and_merge( responses = pd.DataFrame(snapshot["responses"]) responses = filter_responses(responses, reference, period, "lastupdateddate") - responses = responses[responses_keep_cols].set_index([reference, period]) - contributors = contributors[contributors_keep_cols].set_index([reference, period]) + responses = responses[list(responses_keep_cols)].set_index([reference, period]) + contributors = contributors[list(contributors_keep_cols)].set_index( + [reference, period] + ) validate_indices(responses, contributors) return responses.merge(contributors, on=[reference, period]) @@ -439,8 +441,8 @@ def correct_values( # Update value only if columns exist if set(check_columns).issubset(df.columns): - df_temp.loc[ - df[condition_column].isin(condition_values), columns_to_correct - ] = replace_with + df_temp.loc[df[condition_column].isin(condition_values), columns_to_correct] = ( + replace_with + ) return df_temp diff --git a/mbs_results/utilities/constrains.py b/mbs_results/utilities/constrains.py index a8890a32..e63c9ba5 100644 --- a/mbs_results/utilities/constrains.py +++ b/mbs_results/utilities/constrains.py @@ -375,9 +375,9 @@ def calculate_derived_outlier_weights( ) updated_o_weight_bool = df_pre_winsorised[winsorised_target].isna() - df_pre_winsorised.loc[ - updated_o_weight_bool, winsorised_target - ] = post_win_derived.loc[updated_o_weight_bool, winsorised_target] + df_pre_winsorised.loc[updated_o_weight_bool, winsorised_target] = ( + post_win_derived.loc[updated_o_weight_bool, winsorised_target] + ) df_pre_winsorised["post_wins_marker"] = updated_o_weight_bool df_pre_winsorised.reset_index(inplace=True) diff --git a/pre-commits/check_added_large_files.py b/pre-commits/check_added_large_files.py index 59c0353a..973697f7 100755 --- a/pre-commits/check_added_large_files.py +++ b/pre-commits/check_added_large_files.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Pre commit hook to ensure large files aren't added to repo.""" import argparse import json diff --git a/pre-commits/check_merge_conflict.py b/pre-commits/check_merge_conflict.py index e6c67007..997996c5 100755 --- a/pre-commits/check_merge_conflict.py +++ b/pre-commits/check_merge_conflict.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Pre commit hook to check for merge conflict flags in file.""" import argparse import os.path diff --git a/pre-commits/end_of_line_fixer.py b/pre-commits/end_of_line_fixer.py index eb85f62e..6b71763a 100755 --- a/pre-commits/end_of_line_fixer.py +++ b/pre-commits/end_of_line_fixer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Pre commit hook to ensure single blank line at end of python file.""" import argparse import os diff --git a/pre-commits/mixed_line_endings.py b/pre-commits/mixed_line_endings.py index 8ae44909..54edea53 100755 --- a/pre-commits/mixed_line_endings.py +++ b/pre-commits/mixed_line_endings.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Pre commit hook to ensure all EOL characters are the same.""" import argparse import collections diff --git a/pre-commits/remove_whitespace.py b/pre-commits/remove_whitespace.py index 61e5803f..69b0135c 100755 --- a/pre-commits/remove_whitespace.py +++ b/pre-commits/remove_whitespace.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python """Pre commit hook to remove any trailing whitespace.""" import argparse import os diff --git a/tests/utilities/test_constrains.py b/tests/utilities/test_constrains.py index c67ea981..3d0460b9 100644 --- a/tests/utilities/test_constrains.py +++ b/tests/utilities/test_constrains.py @@ -32,6 +32,10 @@ def test_replace_values_index_base(filepath): replace_values_index_based(df_in, "target", 49, ">", 40) replace_values_index_based(df_in, "target", 90, ">=", 40) + # Enforce dtypes, otherwise null==null fails + df_in["constrain_marker"] = df_in["constrain_marker"].astype(str) + df_expected["constrain_marker"] = df_expected["constrain_marker"].astype(str) + assert_frame_equal(df_in, df_expected)