-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecision_tree_classification_on_tumor_data.py
53 lines (36 loc) · 1.22 KB
/
decision_tree_classification_on_tumor_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 26 01:09:58 2020
@author: canberk
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv(r"C:\Users\canbe\Desktop\data.csv")
data.drop(["id","Unnamed: 32"],axis=1,inplace=True)
# malignant = M
# benign = B
M = data[data.diagnosis == "M"]
B = data[data.diagnosis == "B"]
# scatter plot
plt.scatter(M.radius_mean,M.texture_mean,color="red",label="Malignant",alpha= 0.3)
plt.scatter(B.radius_mean,B.texture_mean,color="green",label="Benign",alpha= 0.3)
plt.xlabel("radius_mean")
plt.ylabel("texture_mean")
plt.legend()
plt.show()
data.diagnosis = [1 if each == "M" else 0 for each in data.diagnosis]
y = data.diagnosis.values
x_data = data.drop(["diagnosis"],axis=1)
# normalization
x = (x_data - np.min(x_data))/(np.max(x_data)-np.min(x_data))
# train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.3,random_state=1)
#Model
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(x_train,y_train)
#Show score
print("Score: ",dtc.score(x_test,y_test))
#Score: 0.9181286549707602