Skip to content

Commit

Permalink
more methods on effect scaler
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnMount committed Sep 12, 2022
1 parent 20c1163 commit 300cf95
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 34 deletions.
6 changes: 3 additions & 3 deletions coverage.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ Name Stmts Miss Cover
-----------------------------------------------------
pkg/vtreat/__init__.py 6 0 100%
pkg/vtreat/cross_plan.py 49 1 98%
pkg/vtreat/effect_scaler.py 54 2 96%
pkg/vtreat/effect_scaler.py 59 4 93%
pkg/vtreat/stats_utils.py 132 0 100%
pkg/vtreat/transform.py 14 0 100%
pkg/vtreat/util.py 140 7 95%
pkg/vtreat/vtreat_api.py 285 34 88%
pkg/vtreat/vtreat_db_adapter.py 69 0 100%
pkg/vtreat/vtreat_impl.py 703 58 92%
-----------------------------------------------------
TOTAL 1452 102 93%
TOTAL 1457 104 93%


======================== 42 passed in 91.89s (0:01:31) =========================
======================== 42 passed in 96.65s (0:01:36) =========================
2 changes: 1 addition & 1 deletion docs/search.js

Large diffs are not rendered by default.

97 changes: 80 additions & 17 deletions docs/vtreat/effect_scaler.html

Large diffs are not rendered by default.

19 changes: 14 additions & 5 deletions pkg/build/lib/vtreat/effect_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self):
self._clear()

# noinspection PyPep8Naming
def fit(self, X, y) -> None:
def fit(self, X, y, sample_weight=None) -> None:
"""
Get per-variable effect scaling of (X[:, i] - np.mean(X[:, i])) -> (y - np.mean(y)).
See https://win-vector.com/2022/09/08/y-aware-pca/
Expand All @@ -28,7 +28,7 @@ def fit(self, X, y) -> None:
:param y: dependent values
:return: self for method chaining
"""
self._clear()
assert sample_weight is None # TODO: implement non-None case
assert len(X.shape) == 2
assert X.shape[0] > 0
assert X.shape[1] > 0
Expand All @@ -37,9 +37,10 @@ def fit(self, X, y) -> None:
y_sq = np.dot(y, y)
assert len(y.shape) == 1
assert y.shape[0] == X.shape[0]
self._clear()
self._n_columns = X.shape[1]
self._x_means = np.zeros(self._n_columns)
self._x_scales = np.zeros(self._n_columns) + 1.0
self._x_scales = np.zeros(self._n_columns)

def calc_mean_and_scale(i: int, *, xi: np.ndarray) -> None:
self._x_means[i] = np.mean(xi)
Expand Down Expand Up @@ -86,9 +87,13 @@ def transform_col(i: int, *, xi: np.ndarray) -> np.ndarray:
return pd.DataFrame({
i: transform_col(i, xi=np.array(X[:, i], float)) for i in range(X.shape[1])
})

# noinspection PyPep8Naming
def predict(self, X) -> pd.DataFrame:
return self.transform(X)

# noinspection PyPep8Naming
def fit_transform(self, X, y) -> pd.DataFrame:
def fit_transform(self, X, y, sample_weight=None) -> pd.DataFrame:
"""
Fit and transform combined. Not computed out of sample.
Expand All @@ -97,5 +102,9 @@ def fit_transform(self, X, y) -> pd.DataFrame:
:return: transformed data
"""

self.fit(X, y)
self.fit(X, y, sample_weight=sample_weight)
return self.transform(X)

# noinspection PyPep8Naming
def fit_predict(self, X, y, sample_weight=None) -> pd.DataFrame:
return self.fit_transform(X, y, sample_weight=sample_weight)
Binary file modified pkg/dist/vtreat-1.2.5-py3-none-any.whl
Binary file not shown.
Binary file modified pkg/dist/vtreat-1.2.5.tar.gz
Binary file not shown.
4 changes: 1 addition & 3 deletions pkg/vtreat.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ Platform: any
Classifier: Intended Audience :: Science/Research
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: License :: OSI Approved :: BSD License
Requires-Python: >=3.5.3
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE

Expand Down
2 changes: 1 addition & 1 deletion pkg/vtreat.egg-info/requires.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ numpy
pandas
scipy
sklearn
data_algebra>=1.3.1
data_algebra>=1.4.1
17 changes: 13 additions & 4 deletions pkg/vtreat/effect_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self):
self._clear()

# noinspection PyPep8Naming
def fit(self, X, y) -> None:
def fit(self, X, y, sample_weight=None) -> None:
"""
Get per-variable effect scaling of (X[:, i] - np.mean(X[:, i])) -> (y - np.mean(y)).
See https://win-vector.com/2022/09/08/y-aware-pca/
Expand All @@ -28,7 +28,7 @@ def fit(self, X, y) -> None:
:param y: dependent values
:return: self for method chaining
"""
self._clear()
assert sample_weight is None # TODO: implement non-None case
assert len(X.shape) == 2
assert X.shape[0] > 0
assert X.shape[1] > 0
Expand All @@ -37,6 +37,7 @@ def fit(self, X, y) -> None:
y_sq = np.dot(y, y)
assert len(y.shape) == 1
assert y.shape[0] == X.shape[0]
self._clear()
self._n_columns = X.shape[1]
self._x_means = np.zeros(self._n_columns)
self._x_scales = np.zeros(self._n_columns)
Expand Down Expand Up @@ -86,9 +87,13 @@ def transform_col(i: int, *, xi: np.ndarray) -> np.ndarray:
return pd.DataFrame({
i: transform_col(i, xi=np.array(X[:, i], float)) for i in range(X.shape[1])
})

# noinspection PyPep8Naming
def predict(self, X) -> pd.DataFrame:
return self.transform(X)

# noinspection PyPep8Naming
def fit_transform(self, X, y) -> pd.DataFrame:
def fit_transform(self, X, y, sample_weight=None) -> pd.DataFrame:
"""
Fit and transform combined. Not computed out of sample.
Expand All @@ -97,5 +102,9 @@ def fit_transform(self, X, y) -> pd.DataFrame:
:return: transformed data
"""

self.fit(X, y)
self.fit(X, y, sample_weight=sample_weight)
return self.transform(X)

# noinspection PyPep8Naming
def fit_predict(self, X, y, sample_weight=None) -> pd.DataFrame:
return self.fit_transform(X, y, sample_weight=sample_weight)

0 comments on commit 300cf95

Please sign in to comment.