Skip to content

Latest commit

 

History

History
283 lines (205 loc) · 10.8 KB

api_reference.md

File metadata and controls

283 lines (205 loc) · 10.8 KB
import numpy as np
import ascends as asc
import keras
import ast
from sklearn.metrics import classification_report
import pickle
# 1. Regression API reference

# * NOTE: Ascends-toolkit was developed to be used via command-line interface or web-based interface; however, if needed,
# users may use ascends-toolkit's API. The following shows an example of performing a classification task using 
# the core ascends-toolkit APIs. 

# You need to download the example data file from https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv
# and save it under data folder

csv_file = 'data/iris.csv'
cols_to_remove = []
target_col = 'Name'
input_col = None

# Classifier will need a mapping between categorical values to numerical values
mapping = {'Name': {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}}

# Load data from csv file
# A standard csv file can be loaded and shuffled as follows

data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col, map_all = mapping, random_state = 0)
# check if data is loaded
data_df[:10]
# Generating a default model parameters
model_parameters = asc.default_model_parameters_classifier() 
model_type = 'RF'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_classifier(model_type, model_parameters, x_header_size = x_train.shape[1])   
# scikit-learn's classification report can be used to understand the accuracy of the trained model
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
accuracy = asc.evaluate_classifier(predictions, actual_values)
print("")
print("* Classification Report")
print(classification_report(actual_values, predictions))
* Classification Report
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        50
         1.0       0.94      0.92      0.93        50
         2.0       0.92      0.94      0.93        50

   micro avg       0.95      0.95      0.95       150
   macro avg       0.95      0.95      0.95       150
weighted avg       0.95      0.95      0.95       150
asc.train_and_save_classifier(model, "model.pkl", model_type
                            , input_cols=header_x, target_col=header_y
                            , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', accuracy=accuracy)
* Training initiated ..
* Training done.
* Trained model saved to file: model.pkl
# You can load the saved model by using pickle package
model_dict = pickle.load(open('model.pkl', 'rb'))

# Let's assume that we have a input as follows
x_to_predict = [[4.5, 2.4, 1.2, 4.2]]

# You can scale the data using the loaded scaler
scaler = model_dict['fitted_scaler_x']
x_to_predict = scaler.transform(x_to_predict)
print("Scaled x_to_predict = ", x_to_predict)

# Making prediction can be done as follows
predicted_y = model.predict(x_to_predict)

# Original prediction value will not be a class name, so you need to find out the class name by doing:
for key in mapping['Name'].keys():
    if mapping['Name'][key]==predicted_y[0]:
        print("* Your model thinks that it's a ", key)
Scaled x_to_predict =  [[-1.62768837 -1.51337555 -1.45500383  3.94594202]]
* Your model thinks that it's a  Iris-setosa
# 2. Regression API reference

# * NOTE: Ascends-toolkit was developed to be used via command-line interface or web-based interface; however, if needed,
# users may use ascends-toolkit's API. The following shows an example of performing a regression task using 
# the core ascends-toolkit APIs

csv_file = 'data/BostonHousing.csv'
cols_to_remove = []
target_col = 'medv'

# Load data from csv file
# A standard csv file can be loaded and shuffled as follows

data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, None, cols_to_remove, target_col)
# Performing correlation analysis
# Correlation analysis can be performed as follows
# fs_dict will only contain the top-k features for each criteria (e.g., PCC)
# final_report will contain the full evaluation scores for each feature

k = 10
fs_dict, final_report = asc.correlation_analysis_all(data_df, target_col, top_k = k, file_to_save = None, save_chart = None)

print("Top-k features for each criteria")
print(fs_dict)
print("")
print("Full Correlation Analysis report")
print(final_report)

# To use top-k (k=10) features based on PCC (Pearson's correlation coefficient)

input_col = fs_dict['PCC']

# We need to load the file again
data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col)
* correlation_analysis_all
Top-k features for each criteria
{'PCC': ['rm', 'zn', 'b', 'dis', 'chas', 'age', 'rad', 'crim', 'nox', 'tax'], 'PCC_SQRT': ['lstat', 'rm', 'ptratio', 'indus', 'tax', 'nox', 'crim', 'rad', 'age', 'zn'], 'MIC': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'zn'], 'MAS': ['chas', 'b', 'age', 'zn', 'rad', 'dis', 'nox', 'ptratio', 'crim', 'tax'], 'MEV': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'zn'], 'MCN': ['zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio'], 'MCN_general': ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax'], 'GMIC': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'rad'], 'TIC': ['lstat', 'rm', 'nox', 'indus', 'ptratio', 'age', 'crim', 'tax', 'dis', 'rad']}

Full Correlation Analysis report
              MIC       MAS       MEV       MCN  MCN_general      GMIC  \
age      0.420689  0.099268  0.420689  5.321928          2.0  0.368688   
b        0.272469  0.112505  0.272469  5.321928          2.0  0.207560   
chas     0.133026  0.113481  0.133026  5.321928          2.0  0.079504   
crim     0.358757  0.044427  0.358757  5.000000          2.0  0.326953   
dis      0.315033  0.055968  0.315033  5.321928          2.0  0.282479   
indus    0.414140  0.039397  0.414140  5.321928          2.0  0.350791   
lstat    0.615427  0.034828  0.615427  5.321928          2.0  0.563114   
nox      0.442723  0.047576  0.442723  5.321928          2.0  0.390978   
ptratio  0.371581  0.045671  0.371581  5.321928          2.0  0.335871   
rad      0.278780  0.060237  0.278780  5.321928          2.0  0.238301   
rm       0.450967  0.038707  0.450967  5.321928          2.0  0.429243   
tax      0.324490  0.041496  0.324490  5.321928          2.0  0.287834   
zn       0.289734  0.098851  0.289734  5.321928          2.0  0.236065   

               TIC  PCC_SQRT       PCC  
age      21.160602  0.376955 -0.376955  
b        11.615744  0.333461  0.333461  
chas      4.074539  0.175260  0.175260  
crim     21.033617  0.388305 -0.388305  
dis      18.077608  0.249929  0.249929  
indus    23.199544  0.483725 -0.483725  
lstat    38.803088  0.737663 -0.737663  
nox      25.119383  0.427321 -0.427321  
ptratio  21.688436  0.507787 -0.507787  
rad      14.967824  0.381626 -0.381626  
rm       28.144318  0.695360  0.695360  
tax      19.708624  0.468536 -0.468536  
zn       12.802512  0.360445  0.360445  
# Generating a default model parameters
model_parameters = asc.default_model_parameters() 

# Model Training
model_type = 'RF' # model type can be 'LR','RF','NN','KR','BR','SVM'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1])
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
MAE, R2 = asc.evaluate(predictions, actual_values)

# Printing the performance of regression task
print("MAE = ", MAE,", R2 = ", R2)
MAE =  2.793895239226903 , R2 =  0.6994175646013248
# tuning hyper parameters
tuned_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train
                                             , num_of_folds, scaler_option
                                           , n_iter=1000, random_state=0, verbose=1)
Fitting 5 folds for each of 1000 candidates, totalling 5000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   41.6s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  6.2min
[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  7.7min
[Parallel(n_jobs=-1)]: Done 5000 out of 5000 | elapsed:  7.7min finished
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_search.py:841: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.
  DeprecationWarning)
# Model Training
model_type = 'RF' # model type can be 'LR','RF','NN','KR','BR','SVM'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1])
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
MAE, R2 = asc.evaluate(predictions, actual_values)

# Printing the performance of regression task
print("MAE = ", MAE,", R2 = ", R2)
MAE =  2.8167252676854493 , R2 =  0.7039443187462722
# save prediction-actual result in a csv file
asc.save_test_data(predictions, actual_values, "result.csv")
# saving the trained model in a file
asc.train_and_save(model, "trained_model", model_type
                        , input_cols=header_x, target_col=header_y
                        , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2)
* Training initiated ..
* Training done.
* Trained model saved to file: trained_model
# Model file loading and making a prediction can be done in the same way as the classification example