-
Notifications
You must be signed in to change notification settings - Fork 0
/
Classiifcation_Active_Learning.py
73 lines (52 loc) · 2.01 KB
/
Classiifcation_Active_Learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
##### ----Modules--------
import os
import pandas as pd
import pprint
import numpy as np
import matplotlib.pyplot as plt
# %matplotlib notebook
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC
import joblib
# from sklearn.externals import joblib
print(os.name)
###Change Directory
wrk_path_1 = r"C:\Users\calvi\OneDrive\Documents\2020\Liposomes Vitamins\LiposomeFormulation"
wrk_path_2 = r"C:\Users\Calvin\OneDrive\Documents\2020\Liposomes Vitamins\LiposomeFormulation"
wrk_path_3 = r"/Users/calvin/Documents/OneDrive/Documents/2020/Liposomes Vitamins/LiposomeFormulation"
if os.name == 'posix':
os.chdir(wrk_path_3)
print("Utilising MacBook")
else:
try:
os.chdir(wrk_path_2)
print("Utilising Home Pathway")
except OSError:
os.chdir(wrk_path_1)
print("Utilising Lab Pathway")
## Import Files
datafile = pd.read_csv('Results_Complete.csv')
datafile.corr()
print(list(datafile.columns))
print("Number of Columns:", len(datafile.columns.unique()))
# Drop Columns - Not Useful
# List of columns to drop
drop_list = ['Duplicate_Check',
'PdI Width (d.nm)',
'PdI',
'Z-Average (d.nm)']
datafile_red = datafile.drop(drop_list, axis = 1).reset_index(drop = True)
print((datafile_red.head()))
# Target Column Information & Encode
#First, let us see what values exist in the target column and return a count of NaN values
print(datafile_red['ES_Aggregation'].unique())
print(datafile_red['ES_Aggregation'].isnull().sum(axis=0))
datafile_cleaned = datafile_red[datafile_red['ES_Aggregation'].notna()].reset_index(drop=True)
print(datafile_cleaned)
print(datafile_cleaned['ES_Aggregation'].unique())
ax = sns.countplot(x='ES_Aggregation', data=datafile_red)
print(datafile_red['ES_Aggregation'].value_counts())