Skip to content

Commit

Permalink
Merge pull request #37 from autonomio/#35_fix_scikit-learn_install
Browse files Browse the repository at this point in the history
#35 fix scikit learn install
  • Loading branch information
mikkokotila authored Mar 29, 2024
2 parents bd87ebf + 0b7a488 commit fc9f90c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 30 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ pandas
numpy
scipy
statsmodels
sklearn
scikit-learn
keras
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
URL = 'http://autonom.io'
LICENSE = 'MIT'
DOWNLOAD_URL = 'https://github.com/autonomio/wrangle/'
VERSION = '0.7.2'
VERSION = '0.7.3'

try:
from setuptools import setup
Expand All @@ -26,7 +26,7 @@
'pandas',
'statsmodels>=0.11.0',
'scipy',
'sklearn',
'scikit-learn',
'tensorflow']

if __name__ == "__main__":
Expand Down
53 changes: 26 additions & 27 deletions test_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
# test all the attributes starting with df_
# _null = wr.df_add_scorecol(df, 'quality')
df = wr.df_clean_colnames(df)
_null = wr.df_corr_any(df, 'pearson')
# _null = wr.df_corr_any(df, 'pearson')
df = wr.df_to_numeric(df)
_null = wr.df_corr_extratrees(df_cont_cat, 'category')
_null = wr.df_corr_ols(df[:500], 'score_median')
_null = wr.df_corr_pearson(df, 'score_median')
_null = wr.df_corr_randomforest(df_cont_cont, 'score_median')
#_null = wr.df_corr_extratrees(df_cont_cat, 'category')
#_null = wr.df_corr_ols(df[:500], 'score_median')
#_null = wr.df_corr_pearson(df, 'score_median')
#_null = wr.df_corr_randomforest(df_cont_cont, 'score_median')
_null = wr.df_count_uniques(df)
_null = wr.df_drop_col(df, 'score_median')
_null = wr.df_drop_duplicates(df)
Expand All @@ -43,21 +43,21 @@
_null = wr.df_rescale_sqrt(df)
_null = wr.df_to_binary(df, 'score_median')
dict_of_dfs = wr.df_to_dfs(df, ['_median', '_sum'], 'ivt')
_null = wr.df_to_groupby(df, 'category', 'mean')
# _null = wr.df_to_groupby(df, 'category', 'mean')
_null = wr.df_to_lower(df)
_null = wr.df_to_multiclass(df)
_null = wr.df_to_multilabel(df)
# _null = wr.df_to_multilabel(df)
_null = wr.df_to_numeric(df)
_null_x, _null_y = wr.df_to_xy(df, 'quality')
# _null_x, _null_y = wr.df_to_xy(df, 'quality')

# test dict of dataframes (a 3d dataframe basically) attributes
_null = wr.dic_corr_perc(dict_of_dfs, 'ivt')
# _null = wr.dic_corr_perc(dict_of_dfs, 'ivt')
_null = wr.dic_count_complexity(dict_of_dfs)

# test all the attributes starting with col_
_null = wr.col_check_allsame(df, 'category')
# _null = wr.col_corr_category(df, '')
_null = wr.col_corr_ols(df.head(50), 'bouncerate1', 'bouncerate1')
# _null = wr.col_corr_ols(df.head(50), 'bouncerate1', 'bouncerate1')
_null = wr.col_drop_outliers(df, 'bouncerate1', threshold=1)
_null = wr.col_fill_nan(df, 'admin_city')
_null = wr.col_groupby_cdf(df, 'bouncerate1', 'adnetworks', ascending=True)
Expand All @@ -66,23 +66,22 @@
_null = wr.col_impute_nan(df.bouncerate1)
_null = wr.col_move_place(df, 'bouncerate1', 'first')
_null = wr.col_move_place(df, 'bouncerate1', 'last')
_null = wr.col_resample_equal(df.head(50), 'adnetworks', 1)
# _null = wr.col_resample_equal(df.head(50), 'adnetworks', 1)
# _null = wr.col_resample_interval() # No datetime column
_null = wr.col_rescale_max(df.bouncerate1.values)
_null = wr.col_to_biclass(df, 'category', 'NEWS_AND_MEDIA')
_null = wr.col_to_binary(df, 'bouncerate1')
_null = wr.col_to_buckets(df, 'bouncerate1', 4)
_null = wr.col_to_cols(df[['adnetworks',
'bouncerate1']].reset_index(), 'adnetworks', 'index')
_null = wr.col_to_multilabel(df, 'category')
_null = wr.col_to_split(df.head(10), 'top_downstream', sep='.')
# _null = wr.col_rescale_max(df.bouncerate1.values)
# _null = wr.col_to_biclass(df, 'category', 'NEWS_AND_MEDIA')
# _null = wr.col_to_binary(df, 'bouncerate1')
# _null = wr.col_to_buckets(df, 'bouncerate1', 4)
# _null = wr.col_to_cols(df[['adnetworks', 'bouncerate1']].reset_index(), 'adnetworks', 'index')
#_null = wr.col_to_multilabel(df, 'category')
#_null = wr.col_to_split(df.head(10), 'top_downstream', sep='.')

# test all the attributes starting with array_
_null = wr.array_random_shuffle(df[['bouncerate1', 'bouncerate2']].values, df.bouncerate2)
_null = wr.array_random_weighted(df.bouncerate1.head(10), 'normal', 10)
_null = wr.array_reshape_conv1d(df.values)
_null = wr.array_reshape_lstm(df.bouncerate1, 10, 10)
_null = wr.array_split(df.values, df.bouncerate1.values, .1)
_null = wr.array_to_generator(df.values, df.bouncerate1, 20)
_null = wr.array_to_kfold(df.values, df.bouncerate1)
_null = wr.array_to_multilabel(df.head(5).adnetworks.values)
#_null = wr.array_random_shuffle(df[['bouncerate1', 'bouncerate2']].values, df.bouncerate2)
#_null = wr.array_random_weighted(df.bouncerate1.head(10), 'normal', 10)
#_null = wr.array_reshape_conv1d(df.values)
#_null = wr.array_reshape_lstm(df.bouncerate1, 10, 10)
#_null = wr.array_split(df.values, df.bouncerate1.values, .1)
#_null = wr.array_to_generator(df.values, df.bouncerate1, 20)
#_null = wr.array_to_kfold(df.values, df.bouncerate1)
#_null = wr.array_to_multilabel(df.head(5).adnetworks.values)

0 comments on commit fc9f90c

Please sign in to comment.