Skip to content

Commit

Permalink
rebuild and recheck
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnMount committed Apr 29, 2021
1 parent a837c22 commit a222c7a
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 31 deletions.
44 changes: 22 additions & 22 deletions coverage.txt
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
============================= test session starts ==============================
platform darwin -- Python 3.7.7, pytest-6.2.2, py-1.10.0, pluggy-0.13.1
platform darwin -- Python 3.9.4, pytest-6.2.3, py-1.10.0, pluggy-0.13.1
rootdir: /Users/johnmount/Documents/work/pyvtreat/pkg
plugins: anyio-2.2.0, cov-2.11.1
collected 27 items
collected 28 items

pkg/tests/test_ai200_examples.py .. [ 7%]
pkg/tests/test_classification.py .. [ 14%]
pkg/tests/test_col_name_issues.py ... [ 25%]
pkg/tests/test_dup.py . [ 29%]
pkg/tests/test_id_column_check.py . [ 33%]
pkg/tests/test_imputation_controls.py . [ 37%]
pkg/tests/test_multinomial.py . [ 40%]
pkg/tests/test_nan_inf.py . [ 44%]
pkg/tests/test_outcome_name_required.py . [ 48%]
pkg/tests/test_pipeparams.py . [ 51%]
pkg/tests/test_r1_issue.py . [ 55%]
pkg/tests/test_range.py . [ 59%]
pkg/tests/test_regression.py . [ 62%]
pkg/tests/test_result_restriction.py . [ 66%]
pkg/tests/test_classification.py ... [ 17%]
pkg/tests/test_col_name_issues.py ... [ 28%]
pkg/tests/test_dup.py . [ 32%]
pkg/tests/test_id_column_check.py . [ 35%]
pkg/tests/test_imputation_controls.py . [ 39%]
pkg/tests/test_multinomial.py . [ 42%]
pkg/tests/test_nan_inf.py . [ 46%]
pkg/tests/test_outcome_name_required.py . [ 50%]
pkg/tests/test_pipeparams.py . [ 53%]
pkg/tests/test_r1_issue.py . [ 57%]
pkg/tests/test_range.py . [ 60%]
pkg/tests/test_regression.py . [ 64%]
pkg/tests/test_result_restriction.py . [ 67%]
pkg/tests/test_stats.py ..... [ 85%]
pkg/tests/test_unsup_perf.py . [ 88%]
pkg/tests/test_unsup_perf.py . [ 89%]
pkg/tests/test_unsupervised.py . [ 92%]
pkg/tests/test_user_coders.py . [ 96%]
pkg/tests/test_util.py . [100%]

---------- coverage: platform darwin, python 3.7.7-final-0 -----------
---------- coverage: platform darwin, python 3.9.4-final-0 -----------
Name Stmts Miss Cover
-----------------------------------------------
pkg/vtreat/__init__.py 6 0 100%
pkg/vtreat/cross_plan.py 50 11 78%
pkg/vtreat/stats_utils.py 101 4 96%
pkg/vtreat/transform.py 17 4 76%
pkg/vtreat/util.py 142 19 87%
pkg/vtreat/vtreat_api.py 285 57 80%
pkg/vtreat/vtreat_impl.py 636 85 87%
pkg/vtreat/util.py 145 20 86%
pkg/vtreat/vtreat_api.py 285 56 80%
pkg/vtreat/vtreat_impl.py 639 89 86%
-----------------------------------------------
TOTAL 1237 180 85%
TOTAL 1243 184 85%


============================= 27 passed in 19.97s ==============================
============================= 28 passed in 21.90s ==============================
6 changes: 5 additions & 1 deletion pkg/build/lib/vtreat/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
def safe_to_numeric_array(x):
# work around https://github.com/WinVector/pyvtreat/issues/7
# noinspection PyTypeChecker
return numpy.asarray(pandas.Series(x) + 0.0, dtype=float)
x = pandas.Series(x)
if pandas.api.types.is_float_dtype(x):
return x
# adding zero converts -0 to 0 in some cases, hence the early exit above
return numpy.asarray(x + 0.0, dtype=float)


def can_convert_v_to_numeric(x):
Expand Down
10 changes: 5 additions & 5 deletions pkg/build/lib/vtreat/vtreat_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def vtreat_parameters(user_params=None):
"prevalence_code",
},
"filter_to_recommended": True,
"indicator_min_fraction": 0.1,
"indicator_min_fraction": 0.02,
"cross_validation_plan": vtreat.cross_plan.KWayCrossPlanYStratified(),
"cross_validation_k": 5,
"user_transforms": [],
Expand Down Expand Up @@ -127,7 +127,7 @@ def transform(self, X):
"possibly called transform on same data used to fit\n" +
"(this causes over-fit, please use fit_transform() instead)")
res = vtreat_impl.pre_prep_frame(
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_, cat_cols=self.plan_['cat_list']
)
res = vtreat_impl.perform_transform(x=res, transform=self, params=self.params_)
res = vtreat_impl.limit_to_appropriate_columns(res=res, transform=self)
Expand Down Expand Up @@ -274,7 +274,7 @@ def transform(self, X):
"possibly called transform on same data used to fit\n" +
"(this causes over-fit, please use fit_transform() instead)")
X = vtreat_impl.pre_prep_frame(
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_, cat_cols=self.plan_['cat_list']
)
res = vtreat_impl.perform_transform(x=X, transform=self, params=self.params_)
res = vtreat_impl.limit_to_appropriate_columns(res=res, transform=self)
Expand Down Expand Up @@ -421,7 +421,7 @@ def transform(self, X):
"possibly called transform on same data used to fit\n" +
"(this causes over-fit, please use fit_transform() instead)")
X = vtreat_impl.pre_prep_frame(
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_, cat_cols=self.plan_['cat_list']
)
res = vtreat_impl.perform_transform(x=X, transform=self, params=self.params_)
res = vtreat_impl.limit_to_appropriate_columns(res=res, transform=self)
Expand Down Expand Up @@ -558,7 +558,7 @@ def transform(self, X):
if self.last_fit_x_id_ is None:
raise ValueError("called transform on not yet fit treatment")
X = vtreat_impl.pre_prep_frame(
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_
X, col_list=self.var_list_, cols_to_copy=self.cols_to_copy_, cat_cols=self.plan_['cat_list']
)
res = vtreat_impl.perform_transform(x=X, transform=self, params=self.params_)
res = vtreat_impl.limit_to_appropriate_columns(res=res, transform=self)
Expand Down
17 changes: 14 additions & 3 deletions pkg/build/lib/vtreat/vtreat_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,8 @@ def fit_numeric_outcome_treatment(
return {
"outcome_name": outcome_name,
"cols_to_copy": cols_to_copy,
"num_list": num_list,
"cat_list": cat_list,
"xforms": xforms,
}

Expand Down Expand Up @@ -448,6 +450,8 @@ def fit_binomial_outcome_treatment(
return {
"outcome_name": outcome_name,
"cols_to_copy": cols_to_copy,
"num_list": num_list,
"cat_list": cat_list,
"xforms": xforms,
}

Expand Down Expand Up @@ -533,6 +537,8 @@ def fit_multinomial_outcome_treatment(
return {
"outcome_name": outcome_name,
"cols_to_copy": cols_to_copy,
"num_list": num_list,
"cat_list": cat_list,
"xforms": xforms,
}

Expand Down Expand Up @@ -597,11 +603,13 @@ def fit_unsupervised_treatment(*, X, var_list, outcome_name, cols_to_copy, param
return {
"outcome_name": outcome_name,
"cols_to_copy": cols_to_copy,
"num_list": num_list,
"cat_list": cat_list,
"xforms": xforms,
}


def pre_prep_frame(x, *, col_list, cols_to_copy):
def pre_prep_frame(x, *, col_list, cols_to_copy, cat_cols=None):
"""Create a copy of pandas.DataFrame x restricted to col_list union cols_to_copy with col_list - cols_to_copy
converted to only string and numeric types. New pandas.DataFrame has trivial indexing. If col_list
is empty it is interpreted as all columns."""
Expand All @@ -624,15 +632,18 @@ def pre_prep_frame(x, *, col_list, cols_to_copy):
raise ValueError("no variables")
x = x.loc[:, col_list]
x = x.reset_index(inplace=False, drop=True)
cat_col_set = None
if cat_cols is not None:
cat_col_set = set(cat_cols)
for c in x.columns:
if c in cset:
continue
bad_ind = vtreat.util.is_bad(x[c])
if vtreat.util.can_convert_v_to_numeric(x[c]):
if ((cat_col_set is None) or (c not in cat_col_set)) and vtreat.util.can_convert_v_to_numeric(x[c]):
x[c] = vtreat.util.safe_to_numeric_array(x[c])
else:
# https://stackoverflow.com/questions/22231592/pandas-change-data-type-of-series-to-string
x[c] = numpy.asarray(x[c].apply(str), dtype=str)
x[c] = x[c].astype(str)
x.loc[bad_ind, c] = numpy.nan
return x

Expand Down
Binary file modified pkg/dist/vtreat-0.4.9-py3-none-any.whl
Binary file not shown.
Binary file modified pkg/dist/vtreat-0.4.9.tar.gz
Binary file not shown.

0 comments on commit a222c7a

Please sign in to comment.