Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/jack-mcivor/ngboost into …
Browse files Browse the repository at this point in the history
…feature/py311
  • Loading branch information
Jack McIvor committed Jan 2, 2024
2 parents f948c66 + 07734dd commit fca60c1
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 15 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,15 @@ Probabilistic regression example on the Boston housing dataset:
```python
from ngboost import NGBRegressor

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X, Y = load_boston(True)
#Load Boston housing dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
Y = raw_df.values[1::2, 2]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

ngb = NGBRegressor().fit(X_train, Y_train)
Expand Down
4 changes: 4 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# RELEASE NOTES

## Version 0.4.2
* Fix deprecated numpy type alias. This was causing a warning with NumPy >=1.20 and an error with NumPy >=1.24
* Remove pandas as a declared dependency

## Version 0.4.1
### Added `partial_fit` method for incremental learning

Expand Down
9 changes: 7 additions & 2 deletions examples/regression.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
from sklearn.datasets import load_boston
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from ngboost import NGBRegressor
from ngboost.distns import Normal

if __name__ == "__main__":
# Load Boston housing dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
Y = raw_df.values[1::2, 2]

X, Y = load_boston(return_X_y=True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

ngb = NGBRegressor(Dist=Normal).fit(X_train, Y_train)
Expand Down
8 changes: 6 additions & 2 deletions examples/survival.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import numpy as np
from sklearn.datasets import load_boston
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from ngboost import NGBSurvival
from ngboost.distns import LogNormal

if __name__ == "__main__":
# Load Boston housing dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
Y = raw_df.values[1::2, 2]

X, Y = load_boston(return_X_y=True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

# introduce administrative censoring
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ngboost"
version = "0.4.1dev"
version = "0.4.2dev"
description = "Library for probabilistic predictions via gradient boosting."
authors = ["Stanford ML Group <[email protected]>"]
readme = "README.md"
Expand Down
38 changes: 30 additions & 8 deletions tests/test_distns.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import numpy as np
import pytest
from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

from ngboost import NGBClassifier, NGBRegressor, NGBSurvival
Expand Down Expand Up @@ -29,6 +31,26 @@
Tuple4Array = Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
Tuple5Array = Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]

# pylint: disable=redefined-outer-name
@pytest.fixture(scope="module")
def regression_data():
data = fetch_california_housing()
X, y = data["data"][:1000], data["target"][:1000]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
return X_train, X_test, y_train, y_test


@pytest.fixture(scope="module")
def classification_data():
data = load_breast_cancer()
X, y = data["data"][:1000], data["target"][:1000]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
return X_train, X_test, y_train, y_test


@pytest.mark.slow
@pytest.mark.parametrize(
Expand All @@ -42,8 +64,8 @@
DecisionTreeRegressor(criterion="friedman_mse", max_depth=5),
],
)
def test_dists_runs_on_examples_logscore(dist: Distn, learner, california_housing_data):
X_train, X_test, y_train, y_test = california_housing_data
def test_dists_runs_on_examples_logscore(dist: Distn, learner, regression_data):
X_train, X_test, y_train, y_test = regression_data
# TODO: test early stopping features
ngb = NGBRegressor(Dist=dist, Score=LogScore, Base=learner, verbose=False)
ngb.fit(X_train, y_train)
Expand All @@ -61,8 +83,8 @@ def test_dists_runs_on_examples_logscore(dist: Distn, learner, california_housin
DecisionTreeRegressor(criterion="friedman_mse", max_depth=5),
],
)
def test_dists_runs_on_examples_crpscore(dist: Distn, learner, california_housing_data):
X_train, X_test, y_train, y_test = california_housing_data
def test_dists_runs_on_examples_crpscore(dist: Distn, learner, regression_data):
X_train, X_test, y_train, y_test = regression_data
# TODO: test early stopping features
ngb = NGBRegressor(Dist=dist, Score=CRPScore, Base=learner, verbose=False)
ngb.fit(X_train, y_train)
Expand Down Expand Up @@ -106,8 +128,8 @@ def test_survival_runs_on_examples(
DecisionTreeRegressor(criterion="friedman_mse", max_depth=3),
],
)
def test_bernoulli(learner, breast_cancer_data: Tuple4Array):
X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = breast_cancer_data
def test_bernoulli(learner, classification_data: Tuple4Array):
X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = classification_data
# test early stopping features
# test other args, n_trees, LR, minibatching- args as fixture
ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=learner, verbose=False)
Expand All @@ -127,8 +149,8 @@ def test_bernoulli(learner, breast_cancer_data: Tuple4Array):
DecisionTreeRegressor(criterion="friedman_mse", max_depth=3),
],
)
def test_categorical(k: int, learner, breast_cancer_data: Tuple4Array):
X_train, X_test, y_train, _ = breast_cancer_data
def test_categorical(k: int, learner, classification_data: Tuple4Array):
X_train, X_test, y_train, _ = classification_data
dist = k_categorical(k)
y_train = np.random.randint(0, k, (len(y_train)))
# test early stopping features
Expand Down

0 comments on commit fca60c1

Please sign in to comment.