diff --git a/Kidney Stone Prediction/Dataset/README.md b/Kidney Stone Prediction/Dataset/README.md new file mode 100644 index 000000000..cbabdc231 --- /dev/null +++ b/Kidney Stone Prediction/Dataset/README.md @@ -0,0 +1 @@ +The dataset which is used here, is collected from Kaggle website. Here is the link of the dataset : https://www.kaggle.com/utkarshxy/kidney-stone-data. I have uploaded the same here, you can access that too! diff --git a/Kidney Stone Prediction/Dataset/kidney_stone_data.csv b/Kidney Stone Prediction/Dataset/kidney_stone_data.csv new file mode 100644 index 000000000..6406fcab1 --- /dev/null +++ b/Kidney Stone Prediction/Dataset/kidney_stone_data.csv @@ -0,0 +1,701 @@ +treatment,stone_size,success +B,large,1 +A,large,1 +A,large,0 +A,large,1 +A,large,1 +B,large,1 +A,small,1 +B,large,1 +B,small,1 +A,large,1 +A,large,1 +B,small,1 +A,large,0 +B,large,0 +B,small,1 +A,large,0 +B,small,1 +B,small,1 +A,small,1 +A,large,1 +A,small,1 +B,large,1 +A,large,1 +A,large,0 +A,small,1 +B,small,1 +A,large,0 +B,small,1 +A,small,1 +B,small,1 +B,small,1 +A,large,0 +B,small,1 +B,small,1 +B,small,1 +A,large,0 +A,large,1 +B,small,1 +A,small,1 +B,small,1 +A,large,0 +A,large,1 +A,large,0 +A,small,1 +B,small,1 +A,large,1 +B,small,0 +A,small,1 +A,small,1 +A,large,0 +B,small,1 +B,small,0 +B,small,1 +B,small,1 +B,large,1 +A,small,1 +A,small,1 +B,small,1 +A,large,1 +B,large,1 +A,large,1 +B,small,1 +B,large,1 +A,small,1 +A,large,1 +B,large,1 +A,small,1 +A,large,1 +B,large,0 +B,small,1 +A,small,1 +B,large,1 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,1 +B,large,1 +B,large,1 +A,large,1 +B,small,1 +B,small,1 +A,small,1 +A,large,1 +B,small,1 +A,large,1 +A,large,0 +B,small,1 +B,large,0 +A,large,1 +B,large,1 +A,small,1 +A,large,1 +A,large,0 +A,small,1 +B,large,0 +B,small,1 +A,small,1 +A,large,1 +A,large,1 +B,small,1 +B,large,1 +A,large,1 +A,large,1 +B,small,1 +B,large,1 +A,large,1 +B,small,1 +B,small,1 +B,small,0 +B,small,1 +B,large,0 +B,small,1 +B,small,1 +B,small,0 +A,large,0 +B,large,0 +A,small,1 +A,small,1 +A,large,0 +B,small,1 +A,large,1 +A,large,0 +B,small,1 +B,small,1 +A,large,1 +A,small,1 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +A,small,1 +A,large,1 +A,large,0 +A,large,0 +B,small,1 +B,small,1 +A,large,1 +A,large,0 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +B,large,0 +B,small,1 +A,large,1 +A,small,1 +A,large,1 +A,large,1 +B,large,1 +B,small,1 +B,small,1 +B,large,0 +A,large,1 +B,small,0 +A,large,1 +A,large,0 +B,small,1 +B,small,1 +A,large,0 +A,small,1 +B,large,0 +B,small,1 +A,large,1 +A,large,1 +B,small,0 +A,large,0 +A,large,0 +B,small,1 +B,small,1 +A,large,1 +A,large,1 +A,small,1 +B,small,0 +B,large,1 +A,large,1 +B,small,1 +A,large,1 +A,large,1 +B,small,1 +A,large,1 +A,large,1 +A,large,0 +A,large,0 +A,large,1 +A,large,1 +A,large,1 +B,large,1 +B,small,1 +B,small,1 +A,large,1 +A,large,0 +A,large,1 +A,large,1 +A,large,1 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +B,small,1 +B,small,1 +A,large,1 +B,small,0 +A,large,1 +B,small,1 +A,large,0 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +B,large,0 +A,small,1 +B,small,1 +B,large,1 +A,large,1 +A,small,1 +A,large,1 +B,large,0 +A,large,0 +A,large,0 +B,small,1 +B,small,0 +A,small,1 +A,large,0 +A,small,1 +A,large,1 +B,large,1 +A,large,1 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +A,small,1 +B,small,1 +B,small,1 +B,large,1 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +A,small,1 +A,small,0 +B,small,1 +A,small,1 +B,large,1 +A,large,1 +B,small,1 +A,large,1 +A,large,1 +A,large,0 +B,large,0 +B,small,1 +A,large,1 +B,large,1 +A,large,1 +A,large,1 +A,large,1 +B,small,1 +A,large,1 +A,large,0 +B,small,1 +A,large,1 +A,large,1 +A,large,1 +A,small,1 +B,small,1 +B,large,1 +A,small,1 +A,large,0 +A,large,1 +A,small,1 +B,small,1 +B,small,0 +A,small,1 +A,small,1 +A,large,0 +B,small,1 +B,large,1 +A,large,0 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +B,small,1 +A,large,0 +A,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,1 +B,small,1 +A,large,1 +B,small,0 +A,small,1 +B,small,0 +B,large,1 +A,large,0 +B,large,1 +B,large,1 +B,large,1 +B,large,1 +B,small,1 +B,small,1 +B,small,1 +A,large,0 +B,small,1 +A,large,1 +B,large,1 +B,large,1 +B,small,0 +B,small,1 +B,small,1 +A,large,0 +A,large,1 +A,large,1 +A,small,1 +A,large,1 +B,small,1 +A,small,1 +B,small,1 +B,small,1 +B,small,1 +B,large,1 +A,large,1 +B,large,1 +B,small,1 +B,large,0 +A,large,1 +A,small,1 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +A,small,1 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +A,small,1 +A,large,1 +B,small,0 +B,small,1 +A,small,1 +A,large,1 +A,large,1 +B,small,1 +B,large,1 +A,large,1 +A,small,0 +A,large,1 +B,large,0 +B,small,1 +B,small,1 +B,small,1 +A,small,1 +A,large,1 +B,small,1 +B,small,0 +B,small,1 +A,large,1 +A,large,0 +A,large,1 +A,large,1 +B,small,1 +B,small,1 +A,small,1 +A,small,1 +B,large,1 +A,large,1 +B,small,1 +A,large,0 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +B,large,1 +B,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +A,large,1 +A,large,1 +B,large,1 +A,large,1 +A,large,1 +B,small,1 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +B,small,1 +A,large,0 +A,large,0 +A,large,1 +A,large,1 +A,large,0 +A,large,1 +A,large,0 +B,small,1 +A,large,1 +B,small,1 +A,large,0 +A,large,1 +B,small,1 +B,small,1 +A,large,1 +A,large,1 +B,small,1 +A,large,0 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +A,large,1 +B,large,1 +B,large,1 +A,small,1 +B,small,0 +B,small,1 +B,small,0 +A,large,1 +B,small,1 +A,large,0 +B,small,1 +A,large,0 +B,large,0 +A,large,1 +A,large,1 +A,large,1 +A,large,1 +A,large,1 +B,large,1 +B,small,0 +A,large,0 +B,small,1 +B,large,1 +B,large,1 +B,small,1 +B,small,0 +B,small,1 +B,small,1 +B,small,1 +A,large,0 +A,large,0 +A,large,0 +B,large,1 +B,small,1 +A,large,1 +B,small,1 +A,large,0 +A,large,1 +A,large,1 +A,large,1 +A,large,0 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +B,small,1 +B,small,0 +A,large,0 +A,large,1 +B,small,0 +A,large,1 +B,large,1 +B,small,1 +B,small,1 +A,small,1 +B,large,0 +B,small,1 +B,small,1 +B,large,0 +A,large,1 +A,large,1 +A,small,1 +A,large,0 +B,small,1 +A,large,1 +B,small,1 +A,large,0 +B,large,1 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +A,small,1 +A,large,0 +A,small,1 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +A,small,0 +A,large,1 +A,large,1 +B,large,1 +A,large,1 +A,small,1 +B,large,0 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +A,small,1 +A,large,1 +A,small,1 +A,large,1 +A,small,1 +A,large,0 +A,small,1 +B,small,1 +B,small,1 +A,small,1 +B,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,0 +A,large,0 +A,large,1 +A,small,1 +A,large,1 +B,small,1 +A,small,0 +A,small,1 +B,large,1 +A,large,1 +B,small,1 +B,small,1 +A,large,0 +A,large,1 +A,large,1 +B,large,0 +B,small,1 +A,large,1 +A,large,1 +B,large,0 +A,large,1 +B,small,1 +B,large,0 +A,small,1 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +B,large,1 +B,small,1 +A,large,1 +A,large,1 +A,large,1 +A,large,1 +A,large,1 +B,small,0 +B,small,1 +B,small,1 +A,small,1 +A,large,1 +A,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,1 +B,small,1 +A,large,1 +A,large,1 +B,small,1 +B,small,1 +A,large,0 +A,small,0 +B,small,1 +A,large,0 +A,small,1 +A,large,0 +A,large,1 +A,large,1 +B,large,1 +B,small,0 +A,large,1 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +B,small,0 +B,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,1 +A,large,1 +B,small,1 +B,large,1 +A,small,1 +A,large,1 +A,large,1 +A,large,0 +B,large,0 +B,small,1 +B,large,1 +B,large,0 +B,large,1 +A,large,0 +A,small,0 +A,large,1 +A,large,1 +B,small,0 +B,small,0 +B,large,1 +A,large,1 +B,large,0 +A,large,0 +A,large,1 +B,large,1 +B,small,1 +A,large,0 +B,small,1 +B,small,1 +B,small,1 +A,small,1 +B,small,1 +B,small,1 +B,small,1 +A,large,0 +B,small,1 +B,small,1 +B,small,1 +A,large,1 +B,small,0 +B,small,1 +A,large,1 +A,large,1 +B,small,1 +B,small,1 +B,small,1 +B,small,1 +B,small,0 +A,large,1 +A,large,1 +A,large,0 +B,large,1 +A,large,1 +B,small,0 +B,small,0 +B,small,1 +A,large,1 +A,large,1 +B,small,1 +B,small,1 +B,small,0 +A,large,1 +A,large,0 +A,large,1 +A,large,1 +A,small,1 +B,small,0 +B,small,1 +B,small,1 +A,large,1 +B,small,1 +B,small,0 +A,large,1 +A,large,1 +B,small,1 +B,small,0 +B,small,1 +B,small,1 +A,small,1 +A,large,1 +A,large,1 +B,large,0 +B,small,0 +B,small,1 +B,small,1 +A,large,1 +A,small,1 diff --git a/Kidney Stone Prediction/Images/kid1.jpg b/Kidney Stone Prediction/Images/kid1.jpg new file mode 100644 index 000000000..2769cdf08 Binary files /dev/null and b/Kidney Stone Prediction/Images/kid1.jpg differ diff --git a/Kidney Stone Prediction/Images/kid2.png b/Kidney Stone Prediction/Images/kid2.png new file mode 100644 index 000000000..1a10419ba Binary files /dev/null and b/Kidney Stone Prediction/Images/kid2.png differ diff --git a/Kidney Stone Prediction/Model/Kidney_Stone_Prediction.ipynb b/Kidney Stone Prediction/Model/Kidney_Stone_Prediction.ipynb new file mode 100644 index 000000000..b6d31f9b1 --- /dev/null +++ b/Kidney Stone Prediction/Model/Kidney_Stone_Prediction.ipynb @@ -0,0 +1,1610 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Kidney Stone Prediction\n", + "\n", + "![](https://wallpaperaccess.com/full/5793661.jpg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Contents\n", + "1. Abstract\n", + "2. Dataset\n", + "3. Goal\n", + "4. Importing libraries and Dataset\n", + "5. Data Cleaning\n", + "6. Data Visualization\n", + "7. Prediction Models\n", + " - KNN Algorithm\n", + " - Logistic Regression\n", + " - Random Forest Classifier\n", + " - Decision Tree Classifier\n", + " - Support Vector Machine Classifier\n", + " - AdaBoost Classifier\n", + " - Gradient Boosting Classifier\n", + " - Gaussian Naive Bayes Classifier\n", + " - MLP Classifier\n", + "8. Model comparison\n", + "9. Conclusion\n", + "\n", + "********************" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Abstract\n", + "A small, hard deposit that forms in the kidneys and is often painful when passed.\n", + "\n", + "Kidney stones are hard deposits of minerals and acid salts that stick together in concentrated urine. They can be painful when passing through the urinary tract, but usually don't cause permanent damage.\n", + "\n", + "The most common symptom is severe pain, usually in the side of the abdomen, that's often associated with nausea.\n", + "Treatment includes pain relievers and drinking lots of water to help pass the stone. Medical procedures may be required to remove or break up larger stones.\n", + "\n", + "The most common symptom is severe pain, usually in the side of the abdomen, that's often associated with nausea.\n", + "\n", + "Treatment includes pain relievers and drinking lots of water to help pass the stone. Medical procedures may be required to remove or break up larger stones.\n", + "\n", + "\n", + "### Dataset\n", + "The dataset which is used here, is collected from Kaggle website. Here is the link of the dataset : https://www.kaggle.com/utkarshxy/kidney-stone-data.\n", + "\n", + "### Goal\n", + "The goal of this project is to create a prediction model which will predict the success rate of kidney stone operation based on the stone's size and type of treatment.\n", + "************************************************************" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing all the required libraries and Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "execution": { + "iopub.execute_input": "2021-02-27T08:07:10.032292Z", + "iopub.status.busy": "2021-02-27T08:07:10.031675Z", + "iopub.status.idle": "2021-02-27T08:07:11.397239Z", + "shell.execute_reply": "2021-02-27T08:07:11.396432Z" + }, + "papermill": { + "duration": 1.379825, + "end_time": "2021-02-27T08:07:11.397439", + "exception": false, + "start_time": "2021-02-27T08:07:10.017614", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "#Import Library Files\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.metrics import roc_curve\n", + "from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier\n", + "from sklearn.neural_network import MLPClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.418498Z", + "iopub.status.busy": "2021-02-27T08:07:11.417905Z", + "iopub.status.idle": "2021-02-27T08:07:11.470784Z", + "shell.execute_reply": "2021-02-27T08:07:11.471194Z" + }, + "papermill": { + "duration": 0.065289, + "end_time": "2021-02-27T08:07:11.471372", + "exception": false, + "start_time": "2021-02-27T08:07:11.406083", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
treatmentstone_sizesuccess
0Blarge1
1Alarge1
2Alarge0
3Alarge1
4Alarge1
\n", + "
" + ], + "text/plain": [ + " treatment stone_size success\n", + "0 B large 1\n", + "1 A large 1\n", + "2 A large 0\n", + "3 A large 1\n", + "4 A large 1" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Read CSV File\n", + "\n", + "data = pd.read_csv('kidney_stone_data.csv')\n", + "\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.504485Z", + "iopub.status.busy": "2021-02-27T08:07:11.503835Z", + "iopub.status.idle": "2021-02-27T08:07:11.508313Z", + "shell.execute_reply": "2021-02-27T08:07:11.507816Z" + }, + "papermill": { + "duration": 0.028853, + "end_time": "2021-02-27T08:07:11.508460", + "exception": false, + "start_time": "2021-02-27T08:07:11.479607", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 700 entries, 0 to 699\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 treatment 700 non-null object\n", + " 1 stone_size 700 non-null object\n", + " 2 success 700 non-null int64 \n", + "dtypes: int64(1), object(2)\n", + "memory usage: 16.5+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.008718, + "end_time": "2021-02-27T08:07:11.525762", + "exception": false, + "start_time": "2021-02-27T08:07:11.517044", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Cleaning Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.545834Z", + "iopub.status.busy": "2021-02-27T08:07:11.544975Z", + "iopub.status.idle": "2021-02-27T08:07:11.552100Z", + "shell.execute_reply": "2021-02-27T08:07:11.552503Z" + }, + "papermill": { + "duration": 0.018632, + "end_time": "2021-02-27T08:07:11.552699", + "exception": false, + "start_time": "2021-02-27T08:07:11.534067", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "treatment 0\n", + "stone_size 0\n", + "success 0\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Check null value\n", + "data.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.573186Z", + "iopub.status.busy": "2021-02-27T08:07:11.572313Z", + "iopub.status.idle": "2021-02-27T08:07:11.578950Z", + "shell.execute_reply": "2021-02-27T08:07:11.578265Z" + }, + "papermill": { + "duration": 0.017914, + "end_time": "2021-02-27T08:07:11.579098", + "exception": false, + "start_time": "2021-02-27T08:07:11.561184", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(700, 3)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.607827Z", + "iopub.status.busy": "2021-02-27T08:07:11.604477Z", + "iopub.status.idle": "2021-02-27T08:07:11.740062Z", + "shell.execute_reply": "2021-02-27T08:07:11.739453Z" + }, + "papermill": { + "duration": 0.152112, + "end_time": "2021-02-27T08:07:11.740196", + "exception": false, + "start_time": "2021-02-27T08:07:11.588084", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPWklEQVR4nO3df6zdd13H8edrLW7ID2nTu1n6g1ZtjJ3AkOskEBUZsvJDujCHxUwbaVI1EyExSosR/JEqBDRMZH9UGRT5MRtgrmLimMW5GJHSwmRrx1xD53bT0paBDCJOW97+cb/9cNre252t+95z2/t8JDfnnM/5fr/3XTL67Pece743VYUkSQAXjHoASdLsYRQkSY1RkCQ1RkGS1BgFSVIzf9QDnI1FixbVihUrRj2GJJ1T9uzZ89WqGpvquXM6CitWrGD37t2jHkOSzilJ/nO653z5SJLUGAVJUmMUJEmNUZAkNUZBktQYBUlSYxQkSY1RkCQ1RkGS1JzTn2iWzmcP/OGzRz2CZqHlb72r1+N7piBJaoyCJKkxCpKkxihIkhqjIElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqek1CknuT3JXkjuT7O7WFia5Lcl93e2Cge03J9mf5N4kV/Y5myTpdDNxpvAzVXVZVY13jzcBO6tqFbCze0yS1cA64FJgDXBDknkzMJ8kqTOKl4/WAtu6+9uAqwbWb6qqR6rqALAfuHzmx5OkuavvKBTwqSR7kmzs1i6pqkMA3e3F3foS4MGBfSe6NUnSDJnf8/FfVFUHk1wM3JbkS2fYNlOs1WkbTcZlI8Dy5cufmCklSUDPZwpVdbC7PQLczOTLQYeTLAbobo90m08AywZ2XwocnOKYW6tqvKrGx8bG+hxfkuac3qKQ5ClJnnbiPvAy4G5gB7C+22w9cEt3fwewLsmFSVYCq4Bdfc0nSTpdny8fXQLcnOTE9/lIVf1Dks8B25NsAB4ArgGoqr1JtgP7gGPAdVV1vMf5JEmn6C0KVfVl4LlTrD8EXDHNPluALX3NJEk6Mz/RLElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqTEKkqTGKEiSmt6jkGReki8k+WT3eGGS25Lc190uGNh2c5L9Se5NcmXfs0mSTjYTZwpvBO4ZeLwJ2FlVq4Cd3WOSrAbWAZcCa4AbksybgfkkSZ1eo5BkKfBK4K8GltcC27r724CrBtZvqqpHquoAsB+4vM/5JEkn6/tM4d3A7wDfGVi7pKoOAXS3F3frS4AHB7ab6NZOkmRjkt1Jdh89erSXoSVpruotCkleBRypqj3D7jLFWp22ULW1qsaranxsbOysZpQknWx+j8d+EfDqJK8ALgKenuRDwOEki6vqUJLFwJFu+wlg2cD+S4GDPc4nSTpFb2cKVbW5qpZW1Qom30D+dFVdC+wA1nebrQdu6e7vANYluTDJSmAVsKuv+SRJp+vzTGE6bwe2J9kAPABcA1BVe5NsB/YBx4Drqur4COaTpDlrRqJQVbcDt3f3HwKumGa7LcCWmZhJknQ6P9EsSWqMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqRkqCkl2DrMmSTq3zT/Tk0kuAr4XWJRkAZDuqacDz+x5NknSDDtjFIBfBd7EZAD28N0oPAy8t7+xJEmjcMYoVNX1wPVJ3lBV75mhmSRJI/JoZwoAVNV7krwQWDG4T1V9sKe5JEkjMFQUkvw18IPAncDxbrkAoyBJ55GhogCMA6urqoY9cPcm9R3Ahd33+VhVvS3JQuBvmDzruB94bVV9vdtnM7CByfD8ZlXdOuz3kySdvWE/p3A38P2P8diPAC+pqucClwFrkrwA2ATsrKpVwM7uMUlWA+uAS4E1wA1J5j3G7ylJOgvDniksAvYl2cXkX/YAVNWrp9uhO6v4VvfwSd1XAWuBF3fr24DbgTd36zdV1SPAgST7gcuBzww5oyTpLA0bhd9/PAfv/qW/B/gh4L1V9dkkl1TVIYCqOpTk4m7zJcC/Dew+0a2desyNwEaA5cuXP56xJEnTGPanj/758Ry8qo4DlyV5BnBzkh89w+aZYu209zCqaiuwFWB8fHzo9zgkSY9u2MtcfDPJw93X/yQ5nuThYb9JVf0Xky8TrQEOJ1ncHXcxcKTbbAJYNrDbUuDgsN9DknT2hopCVT2tqp7efV0EXA38xZn2STLWnSGQ5MnAS4EvATuA9d1m64Fbuvs7gHVJLkyyElgF7HqMfx5J0lkY9j2Fk1TV3ybZ9CibLQa2de8rXABsr6pPJvkMsD3JBuAB4JrumHuTbAf2AceA67qXnyRJM2TYD6+9ZuDhBUx+buGMr+dX1ReB502x/hBwxTT7bAG2DDOTJOmJN+yZws8N3D/G5IfO1j7h00iSRmrYnz76lb4HkSSN3rA/fbQ0yc1JjiQ5nOTjSZb2PZwkaWYNe5mL9zP500HPZPIDZX/XrUmSziPDRmGsqt5fVce6rw8AYz3OJUkagWGj8NUk1yaZ131dCzzU52CSpJk3bBReD7wW+ApwCPh5wDefJek8M+yPpP4RsH7g9x4sBN7FZCwkSeeJYc8UnnMiCABV9TWm+GCaJOncNmwULkiy4MSD7kzhcV0iQ5I0ew37F/ufAv+a5GNMXt7itXg5Ckk67wz7ieYPJtkNvITJ33vwmqra1+tkkqQZN/RLQF0EDIEknceGfU9BkjQHGAVJUmMUJEmNUZAkNUZBktQYBUlSYxQkSY1RkCQ1RkGS1BgFSVJjFCRJjVGQJDVGQZLUGAVJUmMUJElNb1FIsizJPyW5J8neJG/s1hcmuS3Jfd3t4K/53Jxkf5J7k1zZ12ySpKn1eaZwDPitqvoR4AXAdUlWA5uAnVW1CtjZPaZ7bh1wKbAGuCHJvB7nkySdorcoVNWhqvp8d/+bwD3AEmAtsK3bbBtwVXd/LXBTVT1SVQeA/cDlfc0nSTrdjLynkGQF8Dzgs8AlVXUIJsMBXNxttgR4cGC3iW7t1GNtTLI7ye6jR4/2OrckzTW9RyHJU4GPA2+qqofPtOkUa3XaQtXWqhqvqvGxsbEnakxJEj1HIcmTmAzCh6vqE93y4SSLu+cXA0e69Qlg2cDuS4GDfc4nSTpZnz99FOB9wD1V9WcDT+0A1nf31wO3DKyvS3JhkpXAKmBXX/NJkk43v8djvwj4JeCuJHd2a28B3g5sT7IBeAC4BqCq9ibZDuxj8ieXrquq4z3OJ0k6RW9RqKp/Yer3CQCumGafLcCWvmaSJJ2Zn2iWJDVGQZLUGAVJUmMUJEmNUZAkNUZBktQYBUlSYxQkSY1RkCQ1RkGS1BgFSVJjFCRJjVGQJDVGQZLUGAVJUmMUJEmNUZAkNX3+Os5zwvN/+4OjHkGz0J53/vKoR5BGwjMFSVJjFCRJjVGQJDVGQZLUGAVJUmMUJEmNUZAkNUZBktQYBUlSYxQkSU1vUUhyY5IjSe4eWFuY5LYk93W3Cwae25xkf5J7k1zZ11ySpOn1eabwAWDNKWubgJ1VtQrY2T0myWpgHXBpt88NSeb1OJskaQq9RaGq7gC+dsryWmBbd38bcNXA+k1V9UhVHQD2A5f3NZskaWoz/Z7CJVV1CKC7vbhbXwI8OLDdRLd2miQbk+xOsvvo0aO9DitJc81seaM5U6zVVBtW1daqGq+q8bGxsZ7HkqS5ZaajcDjJYoDu9ki3PgEsG9huKXBwhmeTpDlvpqOwA1jf3V8P3DKwvi7JhUlWAquAXTM8myTNeb395rUkHwVeDCxKMgG8DXg7sD3JBuAB4BqAqtqbZDuwDzgGXFdVx/uaTZI0td6iUFWvm+apK6bZfguwpa95JEmPbra80SxJmgWMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpMYoSJIaoyBJaoyCJKkxCpKkxihIkhqjIElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpMYoSJKaWReFJGuS3Jtkf5JNo55HkuaSWRWFJPOA9wIvB1YDr0uyerRTSdLcMauiAFwO7K+qL1fV/wI3AWtHPJMkzRnzRz3AKZYADw48ngB+YnCDJBuBjd3DbyW5d4ZmmwsWAV8d9RCzQd61ftQj6GT+t3nC2/JEHOVZ0z0x26Iw1Z+2TnpQtRXYOjPjzC1JdlfV+KjnkE7lf5szZ7a9fDQBLBt4vBQ4OKJZJGnOmW1R+BywKsnKJN8DrAN2jHgmSZozZtXLR1V1LMlvALcC84Abq2rviMeaS3xZTrOV/23OkFTVo28lSZoTZtvLR5KkETIKkqTGKMhLi2jWSnJjkiNJ7h71LHOFUZjjvLSIZrkPAGtGPcRcYhTkpUU0a1XVHcDXRj3HXGIUNNWlRZaMaBZJI2YU9KiXFpE0dxgFeWkRSY1RkJcWkdQYhTmuqo4BJy4tcg+w3UuLaLZI8lHgM8APJ5lIsmHUM53vvMyFJKnxTEGS1BgFSVJjFCRJjVGQJDVGQZLUGAVJUmMUJEmNUZAGJHlKkr9P8u9J7k7yC0nuT7Koe348ye3d/acmeX+Su5J8McnV3fqaJJ/vjrFz4Lg3Jvlcki8kWdutX5pkV5I7u2OsmmqGEf3PoTlo/qgHkGaZNcDBqnolQJLvA94xzba/B3yjqp7dbbsgyRjwl8BPVdWBJAu7bX8X+HRVvT7JM4BdSf4R+DXg+qr6cHeZkXnAK6aYQZoRnilIJ7sLeGmSdyT5yar6xhm2fSmTv6AIgKr6OvAC4I6qOtCtnfhdAC8DNiW5E7gduAhYzuQlHN6S5M3As6rq249xBukJZRSkAVX1H8DzmfyL+U+SvBU4xnf/v3LRwObh9MuMT7V2Yv3qqrqs+1peVfdU1UeAVwPfBm5N8pJpZpBmhFGQBiR5JvDfVfUh4F3AjwH3M/mXNMDVA5t/ismLCZ7YdwGT//L/6SQru7UTLx/dCrwhSbr153W3PwB8uar+nMmr0z5nmhmkGeEF8aQBSa4E3gl8B/g/4NeBJwPvAw4DnwXGq+rFSZ7K5MtHzweOA39QVZ9I8nLgj5n8R9eRqvrZJE8G3g28kMmzhvur6lVJNgPXdt/rK8AvAj9+6gxVtXsm/vySUZAkNb58JElqjIIkqTEKkqTGKEiSGqMgSWqMgiSpMQqSpOb/AdvY3FRZaU+PAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Check sucess rate\n", + "\n", + "sns.countplot(x=kidney_stone['success'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "****************************\n", + "## Prediction Models\n", + "A classification model tries to draw some conclusion from the input values given for training. It will predict the class labels/categories for the new data.\n", + "\n", + "Here we are going to prepare several Classification machine learning models based on those we will do a comparative analysis that which model is better among them.\n", + "\n", + "We are using nine different classification algorithms -\n", + "* **K-Nearest Neighbours Algorithm** : K-Nearest Neighbour is one of the simplest Machine Learning algorithms based on Supervised Learning technique. K-NN algorithm assumes the similarity between the new case/data and available cases and put the new case into the category that is most similar to the available categories. K-NN algorithm stores all the available data and classifies a new data point based on the similarity. This means when new data appears then it can be easily classified into a well suite category by using K- NN algorithm.\n", + "\n", + "\n", + "* **Support Vector Machine Algorithm** : Support Vector Machine or SVM is one of the most popular Supervised Learning algorithms, which is used for Classification as well as Regression problems. However, primarily, it is used for Classification problems in Machine Learning. The goal of the SVM algorithm is to create the best line or decision boundary that can segregate n-dimensional space into classes so that we can easily put the new data point in the correct category in the future. This best decision boundary is called a hyperplane.\n", + "\n", + "\n", + "* **Random Forest Classifier** : Random Forest is a popular machine learning algorithm that belongs to the supervised learning technique. It can be used for both Classification and Regression problems in ML. It is based on the concept of ensemble learning, which is a process of combining multiple classifiers to solve a complex problem and to improve the performance of the model.\n", + "\n", + "\n", + "* **Logistic Regression** : Logistic regression is a statistical model that in its basic form uses a logistic function to model a binary dependent variable, although many more complex extensions exist. In regression analysis, logistic regression (or logit regression) is estimating the parameters of a logistic model (a form of binary regression).\n", + "\n", + "\n", + "* **Naive Bayes Classifcation Algorithm** : Naive Bayes classifiers are a family of simple \"probabilistic classifiers\" based on applying Bayes' theorem with strong independence assumptions between the features. They are among the simplest Bayesian network models, but coupled with kernel density estimation, they can achieve higher accuracy levels.\n", + "\n", + "\n", + "* **Decision Tree Classifier** : Decision Tree is a Supervised learning technique that can be used for both classification and Regression problems, but mostly it is preferred for solving Classification problems. It is a tree-structured classifier, where internal nodes represent the features of a dataset, branches represent the decision rules and each leaf node represents the outcome.\n", + "\n", + "\n", + "* **Gradient Boosting Algorithm** : Gradient boosting is a machine learning technique for regression, classification and other tasks, which produces a prediction model in the form of an ensemble of weak prediction models, typically decision trees.\n", + "\n", + "\n", + "* **AdaBoosting Algorithm** : AdaBoost, short for Adaptive Boosting, is a statistical classification meta-algorithm formulated by Yoav Freund and Robert Schapire, who won the 2003 Gödel Prize for their work. It can be used in conjunction with many other types of learning algorithms to improve performance.\n", + "\n", + "\n", + "* **Artificial Neural Network** : Artificial neural networks, usually simply called neural networks, are computing systems vaguely inspired by the biological neural networks that constitute animal brains. An ANN is based on a collection of connected units or nodes called artificial neurons, which loosely model the neurons in a biological brain.\n", + "\n", + "Let's quickly get into the algorithms!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting the dataset\n", + "* creating the training and testing dataset by spliting at 90:10 ratio\n", + "* training sets are x_train and y_train\n", + "* testing sets are x_test and y_test" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.789096Z", + "iopub.status.busy": "2021-02-27T08:07:11.788157Z", + "iopub.status.idle": "2021-02-27T08:07:11.798610Z", + "shell.execute_reply": "2021-02-27T08:07:11.798080Z" + }, + "papermill": { + "duration": 0.02923, + "end_time": "2021-02-27T08:07:11.798753", + "exception": false, + "start_time": "2021-02-27T08:07:11.769523", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(630, 4)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x= data[['treatment','stone_size']]\n", + "y = data['success']\n", + "\n", + "x_dummies = pd.get_dummies(x) #Change String to Numerical \n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(x_dummies,y, test_size=0.10, random_state=0)\n", + "x_train.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Logistic Regression\n", + " Logistic regression is a statistical model that in its basic form uses a logistic function to model a binary dependent variable, although many more complex extensions exist. In regression analysis, logistic regression (or logit regression) is estimating the parameters of a logistic model (a form of binary regression)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.826623Z", + "iopub.status.busy": "2021-02-27T08:07:11.825697Z", + "iopub.status.idle": "2021-02-27T08:07:11.842287Z", + "shell.execute_reply": "2021-02-27T08:07:11.841677Z" + }, + "papermill": { + "duration": 0.033378, + "end_time": "2021-02-27T08:07:11.842422", + "exception": false, + "start_time": "2021-02-27T08:07:11.809044", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Create a model\n", + "\n", + "logreg = LogisticRegression()\n", + "\n", + "logreg.fit(x_train,y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.871512Z", + "iopub.status.busy": "2021-02-27T08:07:11.870505Z", + "iopub.status.idle": "2021-02-27T08:07:11.875217Z", + "shell.execute_reply": "2021-02-27T08:07:11.874742Z" + }, + "papermill": { + "duration": 0.022217, + "end_time": "2021-02-27T08:07:11.875348", + "exception": false, + "start_time": "2021-02-27T08:07:11.853131", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = logreg.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.907719Z", + "iopub.status.busy": "2021-02-27T08:07:11.906743Z", + "iopub.status.idle": "2021-02-27T08:07:11.911100Z", + "shell.execute_reply": "2021-02-27T08:07:11.911489Z" + }, + "papermill": { + "duration": 0.024309, + "end_time": "2021-02-27T08:07:11.911675", + "exception": false, + "start_time": "2021-02-27T08:07:11.887366", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Create Confusion Matrix\n", + "\n", + "from sklearn import metrics\n", + "\n", + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:11.983425Z", + "iopub.status.busy": "2021-02-27T08:07:11.966288Z", + "iopub.status.idle": "2021-02-27T08:07:12.111234Z", + "shell.execute_reply": "2021-02-27T08:07:12.110749Z" + }, + "papermill": { + "duration": 0.188508, + "end_time": "2021-02-27T08:07:12.111377", + "exception": false, + "start_time": "2021-02-27T08:07:11.922869", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD4CAYAAACt8i4nAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAQe0lEQVR4nO3dfZBddX3H8fd3l6QqLYJAICSpwSG10DqaTkhRtOVBHsSH0JHgUzW1ma7jIIZqFUSrg9URH0AB0XFHkEyVh9SWSUSrYISio4UESAUSIfJQ2GRLpgpTBZXs3W//2Etck2Xv3XB/99w9eb8yZ/bec+/93W/IzofffM/vnBOZiSSpnL6qC5CkujNoJakwg1aSCjNoJakwg1aSCtur+BfMnOOyBu3i1QcvrLoE9aDVD10Xz3SM7f97f9uZM+OAFzzj72uHM1pJKqz4jFaSumq0UXUFuzBoJdVLY6TqCnZh0EqqlczRqkvYhUErqV5GDVpJKssZrSQV5sEwSSrMGa0klZWuOpCkwjwYJkmF2TqQpMI8GCZJhTmjlaTCPBgmSYV5MEySysq0RytJZdmjlaTCbB1IUmHOaCWpsMb2qivYhUErqV5sHUhSYbYOJKkwZ7SSVJhBK0llpQfDJKkwe7SSVFgHWwcR8SDwC6ABjGTmooh4HnANMB94EDg9Mx+dbJy+jlUkSb0gR9vf2nNsZr4kMxc1n58DrM3MBcDa5vNJGbSS6mV0tP1t9ywBVjYfrwRObfUBg1ZSvUxhRhsRAxGxftw2sPNowPURcdu41w7KzGGA5s9ZrUqyRyupXkbav/B3Zg4Cg5O85ejM3BoRs4AbIuInu1OSM1pJ9dLBHm1mbm3+3AZcCywGHomI2QDNn9tajWPQSqqXDvVoI2LviPiDpx4DJwJ3AWuAZc23LQNWtyrJ1oGkeuncOtqDgGsjAsay8srM/HZErANWRcRy4CFgaauBDFpJ9dKhdbSZeT/w4gn2/ww4fipjGbSS6sUzwySpsCmsOugWg1ZSvWRWXcEuDFpJ9eJlEiWpMINWkgrzYJgkFdZoVF3BLgxaSfVi60CSCjNoJakwe7SSVFaOuo5WksqydSBJhbnqQJIKc0YrSYX1YNB6h4UuOenEY7j7rpv5ycYf8P73nVF1OarImZ9ewcrbv8rFN1y6Y9/fnPt2Lv3eF7noO5fwgcEPsvc+e1dYYQ1ktr91iUHbBX19fVx80cd5zWv/mhe9+Fje8IZTOfzwBVWXpQqs/Zfvct7bPvI7+zZ8fwNnnnAGK046ky0PbOH1Z7S8YL8mU/5241PWsnUQEX/M2H3M5zB2692twJrM3FS4ttpYfORC7rvvQR544CEAVq1azeteexKbNm2uuDJ128Zb72bW3N+9O/WG79+x4/G9t9/Dy045uttl1UsPLu+adEYbEWcDVwMB3Aqsaz6+KiLOKV9ePRwy52AeHtq64/nQlmEOOeTgCitSrzr+DSdw203rqy5jems02t+6pNWMdjnwJ5m5ffzOiLgQuBs4f6IPRcQAMAAQ/c+lr2/P7jk1b+72O7IHL06sai191+mMjjT4j2tvqrqUaS2n4cGwUeCQCfbPbr42ocwczMxFmbloTw9ZgC1Dw8yb+9v/jHPnzGZ4+JEKK1KvOfa041h0/GIuePdnqi5l+hvN9rcuaTWjPQtYGxGbgYeb+/4QOAx4V8nC6mTd+g0cdtihzJ8/jy1b/ofTT1/CW9/mygONWfiXf8br33ka5y49hyd//Zuqy5n+ptu1Dpr3MP8jYDFjB8MCGALWZWbvnX7RoxqNBivO+hDf+uaV9Pf1ccXKa9i48d6qy1IF3nvJ+/jTl76Iffbbh8tuuYKrLvwap52xlBkzZ3De1z4GwL133MMXz720xUh6Wj14MCxK9wr3mjmn9/7WqtyrD15YdQnqQasfum7XAxpT9PiH39h25uz90auf8fe1wzPDJNXLdGsdSNK004OtA4NWUq304vIug1ZSvTijlaTCejBovaiMpHrp8Cm4EdEfEXdExHXN54dGxC0RsTkiromIma3GMGgl1UqOZttbm1YA4y+i9Ungs5m5AHiUsUsVTMqglVQvHTwFNyLmAq8Gvtx8HsBxwNebb1kJnNpqHINWUr1M4Xq0ETEQEevHbQM7jfY54P389tou+wOPZeZI8/kQY2fNTsqDYZLqZQoHwzJzEBic6LWIeA2wLTNvi4hjnto90TCtvseglVQvnVt1cDTwuog4BXgWsA9jM9x9I2Kv5qx2LmM3Q5iUrQNJtZKN0ba3ScfJ/EBmzs3M+cAbge9l5luAG4HTmm9bBqxuVZNBK6leyl+P9mzgPRHxU8Z6tpe1+oCtA0m1MoVlW+2PmXkTcFPz8f2MXTq2bQatpHrpwTPDDFpJ9dJ715QxaCXVS470XtIatJLqpfdy1qCVVC8lDoY9UwatpHpxRitJZTmjlaTSnNFKUlk7rqvVQwxaSbXSg3cbN2gl1YxBK0llOaOVpMIMWkkqLBsT3QShWgatpFpxRitJheWoM1pJKsoZrSQVlumMVpKKckYrSYWNuupAksryYJgkFWbQSlJh2XuXozVoJdWLM1pJKszlXZJUWMNVB5JUljNaSSqsF3u0fVUXIEmdlNn+NpmIeFZE3BoR/xURd0fEec39h0bELRGxOSKuiYiZrWoyaCXVSo5G21sLvwGOy8wXAy8BTo6Io4BPAp/NzAXAo8DyVgMZtJJqpTHa1/Y2mRzzy+bTGc0tgeOArzf3rwRObVWTQSupVqbSOoiIgYhYP24bGD9WRPRHxAZgG3ADcB/wWOaOm5oPAXNa1eTBMEm1MjqFVQeZOQgMTvJ6A3hJROwLXAscPtHbWn2PQSupVkos78rMxyLiJuAoYN+I2Ks5q50LbG31eVsHkmqlg6sODmzOZImIZwOvBDYBNwKnNd+2DFjdqiZntKrE12+/uOoSVFNTaR20MBtYGRH9jE1KV2XmdRGxEbg6Ij4G3AFc1mogg1ZSrbRaTdCuzPwxsHCC/fcDi6cylkErqVZ68CqJBq2keulg66BjDFpJteJFZSSpsB68Ca5BK6leEme0klTUiK0DSSrLGa0kFWaPVpIKc0YrSYU5o5WkwhrOaCWprB68N6NBK6leRp3RSlJZXlRGkgrzYJgkFTYatg4kqahG1QVMwKCVVCuuOpCkwlx1IEmFuepAkgqzdSBJhbm8S5IKazijlaSynNFKUmEGrSQV1oO3DDNoJdWLM1pJKsxTcCWpsF5cR9tXdQGS1EmjU9gmExHzIuLGiNgUEXdHxIrm/udFxA0Rsbn5c79WNRm0kmqlU0ELjADvzczDgaOAMyLiCOAcYG1mLgDWNp9PyqCVVCs5hW3ScTKHM/P25uNfAJuAOcASYGXzbSuBU1vVZNBKqpXRaH+LiIGIWD9uG5hozIiYDywEbgEOysxhGAtjYFarmjwYJqlWprLqIDMHgcHJ3hMRvw/8K3BWZv5f7MYdHAxaSbUy2sELJUbEDMZC9muZ+W/N3Y9ExOzMHI6I2cC2VuPYOpBUKx1cdRDAZcCmzLxw3EtrgGXNx8uA1a1qckYrqVY6eOHvo4G3AndGxIbmvnOB84FVEbEceAhY2mogg1ZSrXTqFNzM/AE87X1xjp/KWAatpFoZid67mY1BK6lWei9mDVpJNePVuySpsE4u7+oUg1ZSrfRezBq0kmrG1oEkFdbowTmtQSupVpzRSlJh6YxWkspyRrsHO+nEY7jwwo/S39fH5V+5ik99+tKqS1JFTnz9MvZ+znPo6+ujv7+fVZdfzE/uvY+PfvoSfvPkdvr7+/nHfziDFx3xwqpLnZZc3rWH6uvr4+KLPs7Jp7yJoaFh/vNH3+Ib113Ppk2bqy5NFbn8kvPZb9/n7nh+wRcu451/+xZe8dIjufmHt3LBFy7jis9/qsIKp6/ei1kvk9gVi49cyH33PcgDDzzE9u3bWbVqNa977UlVl6UeEhH88vEnAPjl408w64D9K65o+hoh2966xRltFxwy52AeHtq64/nQlmEWH7mwwopUpYhg4O8/SESwdMmrWLrkFM5e8Q7e8Z4P8ZlLv0yOJl/90gVVlzlt1epgWES8PTO/8jSvDQADANH/XPr69t7dr6mFiW59kdl7vwzqjn/+4gXMOnB/fvboY/zdWedy6PPncf2NP+DsMwc44diX8+21N/PhT3yOL1/0iapLnZZ68WDYM2kdnPd0L2TmYGYuysxFe3rIAmwZGmbe3EN2PJ87ZzbDw49UWJGqNOvAsbbA/vvty/F/8TLu3HgPa/79u7zymKMBOOm4V3DnxnuqLHFayyn86ZZJgzYifvw0253AQV2qcdpbt34Dhx12KPPnz2PGjBmcfvoSvnHd9VWXpQo88atf83izF/vEr37ND2+9nQUvmM+BB+zPujvuBOCW2zbw/HlzqixzWuvUrWw6qVXr4CDgJODRnfYH8MMiFdVQo9FgxVkf4lvfvJL+vj6uWHkNGzfeW3VZqsDPfv4oK879JwAaIw1OOfEYXn7UIp7z7Gdx/kVfYqTR4PdmzuQj7393xZVOX40ebMvFZL3CiLgM+Erzlg47v3ZlZr651RfsNXNO7/2tVblfbf1+1SWoB8044AVTv5f3Tt78/L9qO3Ou/O9rn/H3tWPSGW1mLp/ktZYhK0ndVqtVB5LUi3px1YFBK6lWPAVXkgqzdSBJhfXiqgODVlKt2DqQpMI8GCZJhdmjlaTCerF14PVoJdVKZra9tRIRl0fEtoi4a9y+50XEDRGxuflzv1bjGLSSaqVBtr214Qrg5J32nQOszcwFwNrm80kZtJJqZZRse2slM28Gfr7T7iXAyubjlcCprcYxaCXVylRaBxExEBHrx20DbXzFQZk53PyuYWBWqw94MExSrUzlYFhmDgKD5aoZ44xWUq104Q4Lj0TEbIDmz22tPmDQSqqVRmbb225aAyxrPl4GrG71AVsHkmqlk+toI+Iq4BjggIgYAj4CnA+siojlwEPA0lbjGLSSaqWTQZuZb3qal46fyjgGraRaaedEhG4zaCXVSi+egmvQSqoVLyojSYU1svculGjQSqoVe7SSVJg9WkkqzB6tJBU2autAkspyRitJhbnqQJIKs3UgSYXZOpCkwpzRSlJhzmglqbBGNqouYRcGraRa8RRcSSrMU3AlqTBntJJUmKsOJKkwVx1IUmGegitJhdmjlaTC7NFKUmHOaCWpMNfRSlJhzmglqTBXHUhSYR4Mk6TCerF10Fd1AZLUSTmFP61ExMkRcU9E/DQiztndmpzRSqqVTs1oI6IfuBQ4ARgC1kXEmszcONWxDFpJtdLBHu1i4KeZeT9ARFwNLAF6L2hHntwSpb9juoiIgcwcrLoO9RZ/LzprKpkTEQPAwLhdg+P+LeYAD497bQj4892pyR5tdw20fov2QP5eVCQzBzNz0bht/P/wJgrs3ZouG7SSNLEhYN6453OBrbszkEErSRNbByyIiEMjYibwRmDN7gzkwbDusg+nifh70YMycyQi3gV8B+gHLs/Mu3dnrOjFxb2SVCe2DiSpMINWkgozaLukU6fyqT4i4vKI2BYRd1Vdi8oyaLtg3Kl8rwKOAN4UEUdUW5V6wBXAyVUXofIM2u7YcSpfZj4JPHUqn/ZgmXkz8POq61B5Bm13THQq35yKapHUZQZtd3TsVD5J049B2x0dO5VP0vRj0HZHx07lkzT9GLRdkJkjwFOn8m0CVu3uqXyqj4i4CvgR8MKIGIqI5VXXpDI8BVeSCnNGK0mFGbSSVJhBK0mFGbSSVJhBK0mFGbSSVJhBK0mF/T/4t7z505gJSgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize Confusion Matrix\n", + "\n", + "sns.heatmap(cnf_matrix, annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:12.142927Z", + "iopub.status.busy": "2021-02-27T08:07:12.142198Z", + "iopub.status.idle": "2021-02-27T08:07:12.151815Z", + "shell.execute_reply": "2021-02-27T08:07:12.151317Z" + }, + "papermill": { + "duration": 0.028431, + "end_time": "2021-02-27T08:07:12.151953", + "exception": false, + "start_time": "2021-02-27T08:07:12.123522", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "#Check Model Accurancy\n", + "\n", + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-27T08:07:12.184425Z", + "iopub.status.busy": "2021-02-27T08:07:12.183706Z", + "iopub.status.idle": "2021-02-27T08:07:12.316789Z", + "shell.execute_reply": "2021-02-27T08:07:12.316142Z" + }, + "papermill": { + "duration": 0.152666, + "end_time": "2021-02-27T08:07:12.316943", + "exception": false, + "start_time": "2021-02-27T08:07:12.164277", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "y_pred_proba = logreg.predict_proba(x_test)[::,1]\n", + "fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)\n", + "auc = metrics.roc_auc_score(y_test, y_pred_proba)\n", + "plt.plot(fpr,tpr,label=\"data 1, auc=\"+str(auc))\n", + "plt.legend(loc=4)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### K-Nearest Neighbour Algorithm\n", + "K-Nearest Neighbour is one of the simplest Machine Learning algorithms based on Supervised Learning technique. K-NN algorithm assumes the similarity between the new case/data and available cases and put the new case into the category that is most similar to the available categories. K-NN algorithm stores all the available data and classifies a new data point based on the similarity. This means when new data appears then it can be easily classified into a well suite category by using K- NN algorithm." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "KNeighborsClassifier(n_neighbors=6)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k=6\n", + "knn = KNeighborsClassifier(n_neighbors = k)\n", + "knn.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = knn.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVoAAAD4CAYAAACt8i4nAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAQe0lEQVR4nO3dfZBddX3H8fd3l6QqLYJAICSpwSG10DqaTkhRtOVBHsSH0JHgUzW1ma7jIIZqFUSrg9URH0AB0XFHkEyVh9SWSUSrYISio4UESAUSIfJQ2GRLpgpTBZXs3W//2Etck2Xv3XB/99w9eb8yZ/bec+/93W/IzofffM/vnBOZiSSpnL6qC5CkujNoJakwg1aSCjNoJakwg1aSCtur+BfMnOOyBu3i1QcvrLoE9aDVD10Xz3SM7f97f9uZM+OAFzzj72uHM1pJKqz4jFaSumq0UXUFuzBoJdVLY6TqCnZh0EqqlczRqkvYhUErqV5GDVpJKssZrSQV5sEwSSrMGa0klZWuOpCkwjwYJkmF2TqQpMI8GCZJhTmjlaTCPBgmSYV5MEySysq0RytJZdmjlaTCbB1IUmHOaCWpsMb2qivYhUErqV5sHUhSYbYOJKkwZ7SSVJhBK0llpQfDJKkwe7SSVFgHWwcR8SDwC6ABjGTmooh4HnANMB94EDg9Mx+dbJy+jlUkSb0gR9vf2nNsZr4kMxc1n58DrM3MBcDa5vNJGbSS6mV0tP1t9ywBVjYfrwRObfUBg1ZSvUxhRhsRAxGxftw2sPNowPURcdu41w7KzGGA5s9ZrUqyRyupXkbav/B3Zg4Cg5O85ejM3BoRs4AbIuInu1OSM1pJ9dLBHm1mbm3+3AZcCywGHomI2QDNn9tajWPQSqqXDvVoI2LviPiDpx4DJwJ3AWuAZc23LQNWtyrJ1oGkeuncOtqDgGsjAsay8srM/HZErANWRcRy4CFgaauBDFpJ9dKhdbSZeT/w4gn2/ww4fipjGbSS6sUzwySpsCmsOugWg1ZSvWRWXcEuDFpJ9eJlEiWpMINWkgrzYJgkFdZoVF3BLgxaSfVi60CSCjNoJakwe7SSVFaOuo5WksqydSBJhbnqQJIKc0YrSYX1YNB6h4UuOenEY7j7rpv5ycYf8P73nVF1OarImZ9ewcrbv8rFN1y6Y9/fnPt2Lv3eF7noO5fwgcEPsvc+e1dYYQ1ktr91iUHbBX19fVx80cd5zWv/mhe9+Fje8IZTOfzwBVWXpQqs/Zfvct7bPvI7+zZ8fwNnnnAGK046ky0PbOH1Z7S8YL8mU/5241PWsnUQEX/M2H3M5zB2692twJrM3FS4ttpYfORC7rvvQR544CEAVq1azeteexKbNm2uuDJ128Zb72bW3N+9O/WG79+x4/G9t9/Dy045uttl1UsPLu+adEYbEWcDVwMB3Aqsaz6+KiLOKV9ePRwy52AeHtq64/nQlmEOOeTgCitSrzr+DSdw203rqy5jems02t+6pNWMdjnwJ5m5ffzOiLgQuBs4f6IPRcQAMAAQ/c+lr2/P7jk1b+72O7IHL06sai191+mMjjT4j2tvqrqUaS2n4cGwUeCQCfbPbr42ocwczMxFmbloTw9ZgC1Dw8yb+9v/jHPnzGZ4+JEKK1KvOfa041h0/GIuePdnqi5l+hvN9rcuaTWjPQtYGxGbgYeb+/4QOAx4V8nC6mTd+g0cdtihzJ8/jy1b/ofTT1/CW9/mygONWfiXf8br33ka5y49hyd//Zuqy5n+ptu1Dpr3MP8jYDFjB8MCGALWZWbvnX7RoxqNBivO+hDf+uaV9Pf1ccXKa9i48d6qy1IF3nvJ+/jTl76Iffbbh8tuuYKrLvwap52xlBkzZ3De1z4GwL133MMXz720xUh6Wj14MCxK9wr3mjmn9/7WqtyrD15YdQnqQasfum7XAxpT9PiH39h25uz90auf8fe1wzPDJNXLdGsdSNK004OtA4NWUq304vIug1ZSvTijlaTCejBovaiMpHrp8Cm4EdEfEXdExHXN54dGxC0RsTkiromIma3GMGgl1UqOZttbm1YA4y+i9Ungs5m5AHiUsUsVTMqglVQvHTwFNyLmAq8Gvtx8HsBxwNebb1kJnNpqHINWUr1M4Xq0ETEQEevHbQM7jfY54P389tou+wOPZeZI8/kQY2fNTsqDYZLqZQoHwzJzEBic6LWIeA2wLTNvi4hjnto90TCtvseglVQvnVt1cDTwuog4BXgWsA9jM9x9I2Kv5qx2LmM3Q5iUrQNJtZKN0ba3ScfJ/EBmzs3M+cAbge9l5luAG4HTmm9bBqxuVZNBK6leyl+P9mzgPRHxU8Z6tpe1+oCtA0m1MoVlW+2PmXkTcFPz8f2MXTq2bQatpHrpwTPDDFpJ9dJ715QxaCXVS470XtIatJLqpfdy1qCVVC8lDoY9UwatpHpxRitJZTmjlaTSnNFKUlk7rqvVQwxaSbXSg3cbN2gl1YxBK0llOaOVpMIMWkkqLBsT3QShWgatpFpxRitJheWoM1pJKsoZrSQVlumMVpKKckYrSYWNuupAksryYJgkFWbQSlJh2XuXozVoJdWLM1pJKszlXZJUWMNVB5JUljNaSSqsF3u0fVUXIEmdlNn+NpmIeFZE3BoR/xURd0fEec39h0bELRGxOSKuiYiZrWoyaCXVSo5G21sLvwGOy8wXAy8BTo6Io4BPAp/NzAXAo8DyVgMZtJJqpTHa1/Y2mRzzy+bTGc0tgeOArzf3rwRObVWTQSupVqbSOoiIgYhYP24bGD9WRPRHxAZgG3ADcB/wWOaOm5oPAXNa1eTBMEm1MjqFVQeZOQgMTvJ6A3hJROwLXAscPtHbWn2PQSupVkos78rMxyLiJuAoYN+I2Ks5q50LbG31eVsHkmqlg6sODmzOZImIZwOvBDYBNwKnNd+2DFjdqiZntKrE12+/uOoSVFNTaR20MBtYGRH9jE1KV2XmdRGxEbg6Ij4G3AFc1mogg1ZSrbRaTdCuzPwxsHCC/fcDi6cylkErqVZ68CqJBq2keulg66BjDFpJteJFZSSpsB68Ca5BK6leEme0klTUiK0DSSrLGa0kFWaPVpIKc0YrSYU5o5WkwhrOaCWprB68N6NBK6leRp3RSlJZXlRGkgrzYJgkFTYatg4kqahG1QVMwKCVVCuuOpCkwlx1IEmFuepAkgqzdSBJhbm8S5IKazijlaSynNFKUmEGrSQV1oO3DDNoJdWLM1pJKsxTcCWpsF5cR9tXdQGS1EmjU9gmExHzIuLGiNgUEXdHxIrm/udFxA0Rsbn5c79WNRm0kmqlU0ELjADvzczDgaOAMyLiCOAcYG1mLgDWNp9PyqCVVCs5hW3ScTKHM/P25uNfAJuAOcASYGXzbSuBU1vVZNBKqpXRaH+LiIGIWD9uG5hozIiYDywEbgEOysxhGAtjYFarmjwYJqlWprLqIDMHgcHJ3hMRvw/8K3BWZv5f7MYdHAxaSbUy2sELJUbEDMZC9muZ+W/N3Y9ExOzMHI6I2cC2VuPYOpBUKx1cdRDAZcCmzLxw3EtrgGXNx8uA1a1qckYrqVY6eOHvo4G3AndGxIbmvnOB84FVEbEceAhY2mogg1ZSrXTqFNzM/AE87X1xjp/KWAatpFoZid67mY1BK6lWei9mDVpJNePVuySpsE4u7+oUg1ZSrfRezBq0kmrG1oEkFdbowTmtQSupVpzRSlJh6YxWkspyRrsHO+nEY7jwwo/S39fH5V+5ik99+tKqS1JFTnz9MvZ+znPo6+ujv7+fVZdfzE/uvY+PfvoSfvPkdvr7+/nHfziDFx3xwqpLnZZc3rWH6uvr4+KLPs7Jp7yJoaFh/vNH3+Ib113Ppk2bqy5NFbn8kvPZb9/n7nh+wRcu451/+xZe8dIjufmHt3LBFy7jis9/qsIKp6/ei1kvk9gVi49cyH33PcgDDzzE9u3bWbVqNa977UlVl6UeEhH88vEnAPjl408w64D9K65o+hoh2966xRltFxwy52AeHtq64/nQlmEWH7mwwopUpYhg4O8/SESwdMmrWLrkFM5e8Q7e8Z4P8ZlLv0yOJl/90gVVlzlt1epgWES8PTO/8jSvDQADANH/XPr69t7dr6mFiW59kdl7vwzqjn/+4gXMOnB/fvboY/zdWedy6PPncf2NP+DsMwc44diX8+21N/PhT3yOL1/0iapLnZZ68WDYM2kdnPd0L2TmYGYuysxFe3rIAmwZGmbe3EN2PJ87ZzbDw49UWJGqNOvAsbbA/vvty/F/8TLu3HgPa/79u7zymKMBOOm4V3DnxnuqLHFayyn86ZZJgzYifvw0253AQV2qcdpbt34Dhx12KPPnz2PGjBmcfvoSvnHd9VWXpQo88atf83izF/vEr37ND2+9nQUvmM+BB+zPujvuBOCW2zbw/HlzqixzWuvUrWw6qVXr4CDgJODRnfYH8MMiFdVQo9FgxVkf4lvfvJL+vj6uWHkNGzfeW3VZqsDPfv4oK879JwAaIw1OOfEYXn7UIp7z7Gdx/kVfYqTR4PdmzuQj7393xZVOX40ebMvFZL3CiLgM+Erzlg47v3ZlZr651RfsNXNO7/2tVblfbf1+1SWoB8044AVTv5f3Tt78/L9qO3Ou/O9rn/H3tWPSGW1mLp/ktZYhK0ndVqtVB5LUi3px1YFBK6lWPAVXkgqzdSBJhfXiqgODVlKt2DqQpMI8GCZJhdmjlaTCerF14PVoJdVKZra9tRIRl0fEtoi4a9y+50XEDRGxuflzv1bjGLSSaqVBtr214Qrg5J32nQOszcwFwNrm80kZtJJqZZRse2slM28Gfr7T7iXAyubjlcCprcYxaCXVylRaBxExEBHrx20DbXzFQZk53PyuYWBWqw94MExSrUzlYFhmDgKD5aoZ44xWUq104Q4Lj0TEbIDmz22tPmDQSqqVRmbb225aAyxrPl4GrG71AVsHkmqlk+toI+Iq4BjggIgYAj4CnA+siojlwEPA0lbjGLSSaqWTQZuZb3qal46fyjgGraRaaedEhG4zaCXVSi+egmvQSqoVLyojSYU1svculGjQSqoVe7SSVJg9WkkqzB6tJBU2autAkspyRitJhbnqQJIKs3UgSYXZOpCkwpzRSlJhzmglqbBGNqouYRcGraRa8RRcSSrMU3AlqTBntJJUmKsOJKkwVx1IUmGegitJhdmjlaTC7NFKUmHOaCWpMNfRSlJhzmglqTBXHUhSYR4Mk6TCerF10Fd1AZLUSTmFP61ExMkRcU9E/DQiztndmpzRSqqVTs1oI6IfuBQ4ARgC1kXEmszcONWxDFpJtdLBHu1i4KeZeT9ARFwNLAF6L2hHntwSpb9juoiIgcwcrLoO9RZ/LzprKpkTEQPAwLhdg+P+LeYAD497bQj4892pyR5tdw20fov2QP5eVCQzBzNz0bht/P/wJgrs3ZouG7SSNLEhYN6453OBrbszkEErSRNbByyIiEMjYibwRmDN7gzkwbDusg+nifh70YMycyQi3gV8B+gHLs/Mu3dnrOjFxb2SVCe2DiSpMINWkgozaLukU6fyqT4i4vKI2BYRd1Vdi8oyaLtg3Kl8rwKOAN4UEUdUW5V6wBXAyVUXofIM2u7YcSpfZj4JPHUqn/ZgmXkz8POq61B5Bm13THQq35yKapHUZQZtd3TsVD5J049B2x0dO5VP0vRj0HZHx07lkzT9GLRdkJkjwFOn8m0CVu3uqXyqj4i4CvgR8MKIGIqI5VXXpDI8BVeSCnNGK0mFGbSSVJhBK0mFGbSSVJhBK0mFGbSSVJhBK0mF/T/4t7z505gJSgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(cnf_matrix, annot=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Decision Tree Classifier\n", + "Decision Tree is a Supervised learning technique that can be used for both classification and Regression problems, but mostly it is preferred for solving Classification problems. It is a tree-structured classifier, where internal nodes represent the features of a dataset, branches represent the decision rules and each leaf node represents the outcome." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DecisionTreeClassifier()" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "dtc=DecisionTreeClassifier()\n", + "dtc.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = dtc.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Random Forest Classifier\n", + "Random Forest is a popular machine learning algorithm that belongs to the supervised learning technique. It can be used for both Classification and Regression problems in ML. It is based on the concept of ensemble learning, which is a process of combining multiple classifiers to solve a complex problem and to improve the performance of the model." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomForestClassifier(n_estimators=225, random_state=1)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "rfc=RandomForestClassifier(n_estimators=225,random_state=1)\n", + "rfc.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = rfc.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Gausian Naive Bayes Classifier Algorithm\n", + "Naive Bayes classifiers are a family of simple \"probabilistic classifiers\" based on applying Bayes' theorem with strong independence assumptions between the features. They are among the simplest Bayesian network models, but coupled with kernel density estimation, they can achieve higher accuracy levels." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GaussianNB()" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "nb=GaussianNB()\n", + "nb.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = nb.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SVM Classification\n", + "Support Vector Machine or SVM is one of the most popular Supervised Learning algorithms, which is used for Classification as well as Regression problems. However, primarily, it is used for Classification problems in Machine Learning. The goal of the SVM algorithm is to create the best line or decision boundary that can segregate n-dimensional space into classes so that we can easily put the new data point in the correct category in the future. This best decision boundary is called a hyperplane." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SVC(random_state=1)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "svc=SVC(random_state=1)\n", + "svc.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = svc.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Gradient Boosting Algorithm\n", + "Gradient boosting is a machine learning technique for regression, classification and other tasks, which produces a prediction model in the form of an ensemble of weak prediction models, typically decision trees." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "gfc=GradientBoostingClassifier(n_estimators= 1000, max_leaf_nodes= 4, max_depth=None,random_state= 2,min_samples_split= 5)\n", + "gfc.fit(x_train,y_train)\n", + "y_head=gfc.predict(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = gfc.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### AdaBoost Algorithm\n", + "AdaBoost, short for Adaptive Boosting, is a statistical classification meta-algorithm formulated by Yoav Freund and Robert Schapire, who won the 2003 Gödel Prize for their work. It can be used in conjunction with many other types of learning algorithms to improve performance." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AdaBoostClassifier(n_estimators=100, random_state=0)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abc=AdaBoostClassifier(n_estimators=100, random_state=0)\n", + "abc.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = abc.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Artificial Neural Networks Algorithm\n", + "Artificial neural networks, usually simply called neural networks, are computing systems vaguely inspired by the biological neural networks that constitute animal brains. An ANN is based on a collection of connected units or nodes called artificial neurons, which loosely model the neurons in a biological brain." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MLPClassifier(alpha=1, max_iter=1000)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ysa=MLPClassifier(alpha=1, max_iter=1000)\n", + "ysa.fit(x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1], dtype=int64)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = ysa.predict(x_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 12],\n", + " [ 0, 58]], dtype=int64)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cnf_matrix = metrics.confusion_matrix(y_test, y_pred)\n", + "cnf_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8285714285714286\n", + "Precision: 0.8285714285714286\n", + "Recall: 1.0\n" + ] + } + ], + "source": [ + "print('Accuracy:' ,metrics.accuracy_score(y_test,y_pred))\n", + "print(\"Precision:\",metrics.precision_score(y_test, y_pred))\n", + "print(\"Recall:\",metrics.recall_score(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "****************************\n", + "## Model Comparison\n", + "We have deployed nine machine learning algorithms and every algorithm is deployed successfully without any hesitation. We have checked the accuracy of the models based on the accuracy score of each of the models. Now let's take a look at the scores of each models.\n", + "\n", + "|Name of the Model|Accuracy Score|\n", + "|:---:|:---:|\n", + "|Logistic Regression|82.86|\n", + "|Decision Tree Classifier|82.86|\n", + "|Random Forest Classifier|82.86|\n", + "|Naive Bayes Algorithm|82.86|\n", + "|KNN Algorithm|82.86|\n", + "|Support Vector Machine Algorithm|82.86|\n", + "|Gradient Boosting Algorithm|82.86|\n", + "|AdaBoosting Classifier|82.86|\n", + "|Artificial Neural Network|82.86|\n", + "\n", + "***************************************\n", + "## Conclusion\n", + "* For this project we have deployed nine different algortihms and every algorithm provides more or less same accuracy score, which is 82.86.\n", + "* To predict the kidney stone using this dataset we can use any of the above mentioned algorithms or models and deploy the final model.\n", + "* Here data limitation comes into play and it restricts the model to be more accurate.\n", + "* To make the model more accurate we need more attributes in the dataset and also the number of data must be increased so that the model can be trained better.\n", + "* Apart from the data limitations created by the dataset, the models are successfully deployed, and are predicting the outcome accurate enough.\n", + "* To work with this dataset in my opinion, every one should go with simple logistic regression, as that would be enough!\n", + "\n", + "Hooray!! The models are deployed successfully!\n", + "\n", + "\n", + "### Hope this project will help you! Thank you!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "papermill": { + "default_parameters": {}, + "duration": 8.500556, + "end_time": "2021-02-27T08:07:12.940949", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2021-02-27T08:07:04.440393", + "version": "2.2.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Kidney Stone Prediction/Model/README.md b/Kidney Stone Prediction/Model/README.md new file mode 100644 index 000000000..ca1df3589 --- /dev/null +++ b/Kidney Stone Prediction/Model/README.md @@ -0,0 +1,71 @@ +# Kidney Stone Prediction +A small, hard deposit that forms in the kidneys and is often painful when passed. + +Kidney stones are hard deposits of minerals and acid salts that stick together in concentrated urine. They can be painful when passing through the urinary tract, but usually don't cause permanent damage. + +The most common symptom is severe pain, usually in the side of the abdomen, that's often associated with nausea. +Treatment includes pain relievers and drinking lots of water to help pass the stone. Medical procedures may be required to remove or break up larger stones. + +Treatment includes pain relievers and drinking lots of water to help pass the stone. Medical procedures may be required to remove or break up larger stones. + + + +## Dataset +The dataset which is used here, is collected from Kaggle website. Here is the link of the dataset : https://www.kaggle.com/utkarshxy/kidney-stone-data. + +## Goal +The goal of this project is to create a prediction model which will predict the success rate of kidney stone operation based on the stone's size and type of treatment. +************************************************************ + +## What Have I done? +1. Importing all the required libraries. Check [`requirements.txt`] +2. Upload the dataset and the Jupyter Notebook file. +3. Data Cleaning +4. Data Visualization +5. Prediction Models + - KNN Algorithm + - Logistic Regression + - Random Forest Classifier + - Decision Tree Classifier + - Support Vector Machine Classifier + - AdaBoost Classifier + - Gradient Boosting Classifier + - Gaussian Naive Bayes Classifier + - MLP Classifier +6. Model comparison +7. Conclusion + +****************************************************************** +## Libraries used +1. Numpy +2. Pandas +3. Matplotlib +4. Sklearn +5. Seaborn + +********************************** + +## Model Comparison +We have deployed nine machine learning algorithms and every algorithm is deployed successfully without any hesitation. We have checked the accuracy of the models based on the accuracy score of each of the models. Now let's take a look at the scores of each models. + +|Name of the Model|Accuracy Score| +|:---:|:---:| +|Logistic Regression|82.86| +|Decision Tree Classifier|82.86| +|Random Forest Classifier|82.86| +|Naive Bayes Algorithm|82.86| +|KNN Algorithm|82.86| +|Support Vector Machine Algorithm|82.86| +|Gradient Boosting Algorithm|82.86| +|AdaBoosting Classifier|82.86| +|Artificial Neural Network|82.86| + +*************************************** +## Conclusion +* For this project we have deployed nine different algortihms and every algorithm provides more or less same accuracy score, which is 82.86. +* To predict the kidney stone using this dataset we can use any of the above mentioned algorithms or models and deploy the final model. +* Here data limitation comes into play and it restricts the model to be more accurate. +* To make the model more accurate we need more attributes in the dataset and also the number of data must be increased so that the model can be trained better. +* Apart from the data limitations created by the dataset, the models are successfully deployed, and are predicting the outcome accurate enough. +* To work with this dataset in my opinion, every one should go with simple logistic regression, as that would be enough! +****************************************** diff --git a/Kidney Stone Prediction/requirements.txt b/Kidney Stone Prediction/requirements.txt new file mode 100644 index 000000000..b332610db --- /dev/null +++ b/Kidney Stone Prediction/requirements.txt @@ -0,0 +1,7 @@ +The required libraries for this project work + +sklearn +seaborn +Numpy==1.19.2 +pandas==1.2.4 +matplotlib==3.4.2