Skip to content

Commit

Permalink
Merge pull request #1 from ai2es/schreck
Browse files Browse the repository at this point in the history
Added predict_ensemble to all model classes
  • Loading branch information
djgagne authored Sep 22, 2023
2 parents 7c29b29 + 2cd411e commit 3027754
Show file tree
Hide file tree
Showing 41 changed files with 3,379 additions and 3,108 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/python-package-conda.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Python Package using Conda

on: [push]

jobs:
build-linux:
runs-on: ubuntu-latest
strategy:
max-parallel: 5
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v2
- uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
mamba-version: "*"
channel-priority: true
environment-file: environment.yml
auto-activate-base: false
activate-environment: test
- shell: bash -l {0}
run: |
conda info
conda list
conda config --show-sources
conda config --show
printenv | sort
- name: Lint with flake8
shell: bash -l {0}
run: |
mamba install flake8
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=100 --max-line-length=127 --statistics
- name: Test with pytest
shell: bash -l {0}
run: |
pytest
39 changes: 39 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Upload Python Package

on:
release:
types: [published]

permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
5 changes: 4 additions & 1 deletion applications/evaluate_ptype.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ def locate_best_model(filepath, metric="val_ave_acc", direction="max"):


def evaluate(conf, reevaluate=False):
output_features = conf["ptypes"]
input_features = []
for features in conf["input_features"]:
input_features += conf[features]
output_features = conf["output_features"]
n_splits = conf["ensemble"]["n_splits"]
save_loc = conf["save_loc"]
labels = ["rain", "snow", "sleet", "frz-rain"]
Expand Down
82 changes: 12 additions & 70 deletions applications/train_classifier_ptype.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@
from tensorflow.keras import backend as K
from argparse import ArgumentParser

from ptype.callbacks import MetricsCallback
from ptype.data import load_ptype_data_day, preprocess_data
from sklearn.model_selection import GroupShuffleSplit
try:
from ptype.callbacks import MetricsCallback
except ImportError:
import subprocess
subprocess.run(['pip', 'install', 'git+https://github.com/ai2es/ptype-physical.git'], check=True)
from ptype.callbacks import MetricsCallback
from ptype.data import load_ptype_uq, preprocess_data

from sklearn.model_selection import GroupShuffleSplit
from evml.keras.callbacks import get_callbacks, ReportEpoch
from evml.keras.models import CategoricalDNN
from evml.pbs import launch_pbs_jobs
Expand All @@ -31,69 +36,6 @@
logger = logging.getLogger(__name__)


def load_ptype_uq(conf, data_split=0, verbose=0, drop_mixed=False):

# Load
df = pd.read_parquet(conf["data_path"])

# Drop mixed cases
if drop_mixed:
logger.info("Dropping data points with mixed observations")
c1 = df["ra_percent"] == 1.0
c2 = df["sn_percent"] == 1.0
c3 = df["pl_percent"] == 1.0
c4 = df["fzra_percent"] == 1.0
condition = c1 | c2 | c3 | c4
df = df[condition].copy()

# QC-Filter
qc_value = str(conf["qc"])
cond1 = df[f"wetbulb{qc_value}_filter"] == 0.0
cond2 = df["usa"] == 1.0
dg = df[cond1 & cond2].copy()

dg["day"] = dg["datetime"].apply(lambda x: str(x).split(" ")[0])
dg["id"] = range(dg.shape[0])

# Select test cases
test_days_c1 = dg["day"].isin(
[day for case in conf["case_studies"].values() for day in case]
)
test_days_c2 = dg["day"] >= conf["test_cutoff"]
test_condition = test_days_c1 | test_days_c2

# Partition the data into trainable-only and test-only splits
train_data = dg[~test_condition].copy()
test_data = dg[test_condition].copy()

# Make N train-valid splits using day as grouping variable, return "data_split" split
gsp = GroupShuffleSplit(
n_splits=conf["ensemble"]["n_splits"],
random_state=conf["seed"],
train_size=conf["train_size1"],
)
splits = list(gsp.split(train_data, groups=train_data["day"]))

train_index, valid_index = splits[data_split]
train_data, valid_data = (
train_data.iloc[train_index].copy(),
train_data.iloc[valid_index].copy(),
)

size = df.shape[0]
logger.info("Train, validation, and test fractions:")
logger.info(
f"{train_data.shape[0]/size}, {valid_data.shape[0]/size}, {test_data.shape[0]/size}"
)
print(
f"{train_data.shape[0]/size}, {valid_data.shape[0]/size}, {test_data.shape[0]/size}"
)

data = {"train": train_data, "val": valid_data, "test": test_data}

return data


class Objective(BaseObjective):
def __init__(self, config, metric="val_loss"):

Expand Down Expand Up @@ -144,10 +86,10 @@ def custom_updates(self, trial, conf):


def trainer(conf, evaluate=True, data_split=0, mc_forward_passes=0):
input_features = (
conf["TEMP_C"] + conf["T_DEWPOINT_C"] + conf["UGRD_m/s"] + conf["VGRD_m/s"]
)
output_features = conf["ptypes"]
input_features = []
for features in conf["input_features"]:
input_features += conf[features]
output_features = conf["output_features"]
metric = conf["metric"]
# flag for using the evidential model
if conf["model"]["loss"] == "dirichlet":
Expand Down
Loading

0 comments on commit 3027754

Please sign in to comment.