Skip to content

Commit

Permalink
fix builds by supporting sparse_output renamed parameter in OneHotEnc…
Browse files Browse the repository at this point in the history
…oder in new scikit-learn version update (#2507)
  • Loading branch information
imatiach-msft authored Jan 26, 2024
1 parent f1cf49c commit 66e33cb
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 7 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/CI-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ jobs:
pip install -v -e .
working-directory: ${{ matrix.packageDirectory }}

- if: ${{ (matrix.packageDirectory == 'erroranalysis') || (matrix.packageDirectory == 'responsibleai') }}
name: Install rai_test_utils locally until next version is released
run: |
pip install -v -e .
working-directory: rai_test_utils

- name: Pip freeze
run: |
pip freeze > installed-requirements-dev.txt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,19 @@
"metadata": {},
"outputs": [],
"source": [
"from packaging import version\n",
"import sklearn\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"\n",
"# for older scikit-learn versions use sparse, for newer sparse_output:\n",
"if version.parse(sklearn.__version__) < version.parse('1.2'):\n",
" ohe_params = {\"sparse\": False}\n",
"else:\n",
" ohe_params = {\"sparse_output\": False}\n",
"\n",
"def split_label(dataset):\n",
" X = dataset.drop(['income'], axis=1)\n",
" y = dataset[['income']]\n",
Expand All @@ -141,7 +149,7 @@
" ])\n",
" cat_pipe = Pipeline([\n",
" ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),\n",
" ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))\n",
" ('cat_encoder', OneHotEncoder(handle_unknown='ignore', **ohe_params))\n",
" ])\n",
" feat_pipe = ColumnTransformer([\n",
" ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,20 @@
"metadata": {},
"outputs": [],
"source": [
"from packaging import version\n",
"from raiutils.dataset import fetch_dataset\n",
"import sklearn\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"\n",
"# for older scikit-learn versions use sparse, for newer sparse_output:\n",
"if version.parse(sklearn.__version__) < version.parse('1.2'):\n",
" ohe_params = {\"sparse\": False}\n",
"else:\n",
" ohe_params = {\"sparse_output\": False}\n",
"\n",
"def split_label(dataset, target_feature):\n",
" X = dataset.drop([target_feature], axis=1)\n",
" y = dataset[[target_feature]]\n",
Expand All @@ -93,7 +101,7 @@
" ])\n",
" cat_pipe = Pipeline([\n",
" ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),\n",
" ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))\n",
" ('cat_encoder', OneHotEncoder(handle_unknown='ignore', **ohe_params))\n",
" ])\n",
" feat_pipe = ColumnTransformer([\n",
" ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n",
Expand Down Expand Up @@ -179,7 +187,7 @@
"source": [
"To use Responsible AI Dashboard, initialize a RAIInsights object upon which different components can be loaded.\n",
"\n",
"RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.",
"RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.\n",
"\n",
"You may also create the `FeatureMetadata` container, identify any feature of your choice as the `identity_feature`, specify a list of strings of categorical feature names via the `categorical_features` parameter, and specify dropped features via the `dropped_features` parameter. The `FeatureMetadata` may also be passed into the `RAIInsights`."
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,20 @@
"metadata": {},
"outputs": [],
"source": [
"from packaging import version\n",
"from raiutils.dataset import fetch_dataset\n",
"import sklearn\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"\n",
"# for older scikit-learn versions use sparse, for newer sparse_output:\n",
"if version.parse(sklearn.__version__) < version.parse('1.2'):\n",
" ohe_params = {\"sparse\": False}\n",
"else:\n",
" ohe_params = {\"sparse_output\": False}\n",
"\n",
"def split_label(dataset, target_feature):\n",
" X = dataset.drop([target_feature], axis=1)\n",
" y = dataset[[target_feature]]\n",
Expand All @@ -83,7 +91,7 @@
" ])\n",
" cat_pipe = Pipeline([\n",
" ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),\n",
" ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))\n",
" ('cat_encoder', OneHotEncoder(handle_unknown='ignore', **ohe_params))\n",
" ])\n",
" feat_pipe = ColumnTransformer([\n",
" ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n",
Expand Down Expand Up @@ -148,7 +156,7 @@
"source": [
"To use Responsible AI Dashboard, initialize a RAIInsights object upon which different components can be loaded.\n",
"\n",
"RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.",
"RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.\n",
"\n",
"You may also create the `FeatureMetadata` container, identify any feature of your choice as the `identity_feature`, specify a list of strings of categorical feature names via the `categorical_features` parameter, and specify dropped features via the `dropped_features` parameter. The `FeatureMetadata` may also be passed into the `RAIInsights`."
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import numpy as np
import pandas as pd
import sklearn
from packaging import version
from sklearn import svm
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
Expand Down Expand Up @@ -127,6 +129,11 @@ def conv(X):
(conv(np.prod(x, axis=1)).reshape(-1, 1),
conv(np.prod(x, axis=1)**2).reshape(-1, 1))
))
# for older scikit-learn versions use sparse, for newer sparse_output:
if version.parse(sklearn.__version__) < version.parse('1.2'):
ohe_params = {"sparse": False}
else:
ohe_params = {"sparse_output": False}
transformations = ColumnTransformer([
("age_fare_1", Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
Expand All @@ -137,8 +144,8 @@ def conv(X):
("embarked", Pipeline(steps=[
("imputer",
SimpleImputer(strategy='constant', fill_value='missing')),
("encoder", OneHotEncoder(sparse=False))]), ["embarked"]),
("sex_pclass", OneHotEncoder(sparse=False), ["sex", "pclass"])
("encoder", OneHotEncoder(**ohe_params))]), ["embarked"]),
("sex_pclass", OneHotEncoder(**ohe_params), ["sex", "pclass"])
])
clf = Pipeline(steps=[('preprocessor', transformations),
('classifier',
Expand Down

0 comments on commit 66e33cb

Please sign in to comment.