diff --git a/SoundProject.ipynb b/SoundProject.ipynb new file mode 100644 index 0000000..0bdc09b --- /dev/null +++ b/SoundProject.ipynb @@ -0,0 +1,796 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import librosa\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.multiclass import OneVsRestClassifier\n", + "import matplotlib.pyplot as plt\n", + "import soundfile as sf\n", + "from sklearn.metrics import classification_report\n", + "from keras.models import Sequential\n", + "from keras.utils import to_categorical\n", + "from keras.layers import Dense, Conv2D,Flatten,MaxPooling2D,Dropout\n", + "from sklearn.svm import NuSVC\n", + "from sklearn.svm import SVC" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "y = []\n", + "X = []; \n", + "yp = []; \n", + "new_X = []\n", + "new_X_mel =[]\n", + "yp_mel=[]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "path = 'C:/Users/clyde/Documents/Thinkful/Pre Data Science Bootcamp/Sound Classification Datbase/Train/'\n", + "df = pd.read_csv('C:/Users/clyde/Documents/Thinkful/Pre Data Science Bootcamp/Sound Classification Datbase/train.csv')\n", + "class_label = df.Class.unique().tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "i=1" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "[y,sr] = sf.read('C:/Users/clyde/Documents/Thinkful/Pre Data Science Bootcamp/Sound Classification Datbase/Train/%d.wav'%i)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "zz = librosa.feature.melspectrogram(y=y, sr=sr,S=None, n_fft=2048, hop_length=1510)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "S = librosa.feature.inverse.mel_to_stft(zz)\n", + "y = librosa.griffinlim(S)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import IPython.display as ipd\n", + "ipd.Audio(y,rate=sr)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([15809.936 , 15787.592 , 15768.905 , ..., 1401.3564, 1418.318 ,\n", + " 1432.3749], dtype=float32)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "librosa.feature.inverse.mfcc_to_audio(zz, n_mels=128, dct_type=2, norm='ortho', ref=1.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "q = librosa.istft(S)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ipd.Audio(q,rate=sr)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "zzz=librosa.feature.inverse.mel_to_audio(zz, sr=sr, n_fft=2048, hop_length=1510)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ipd.Audio(zzz,rate=sr)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[y,sr] = sf.read('C:/Users/clyde/Documents/Thinkful/Pre Data Science Bootcamp/Sound Classification Datbase/Train/%d.wav'%3)\n", + "ipd.Audio(y[:,1],rate=sr)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.00198364, -0.00192261, -0.00210571, ..., 0.01861572,\n", + " 0.15274048, 0.12020874])" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y[:,1]" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import IPython.display as ipd\n", + "n = len(y[0:89999])\n", + "hl = 353\n", + "n_fft = 511\n", + "y_pad = librosa.util.fix_length(y[:,1], n + n_fft // 2)\n", + "D = librosa.stft(y_pad, n_fft=n_fft,hop_length=hl)\n", + "y_out = librosa.istft(D,length=n,hop_length=hl)\n", + "ipd.Audio(y_out,rate=sr)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((256, 256), 89999)" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "D.shape,n" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.03317261, 0.08050537, 0.0947876 , ..., -0.01226807,\n", + " -0.01998901, -0.03671265])" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pad" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\clyde\\Anaconda3\\envs\\dask-scipy\\lib\\site-packages\\librosa\\display.py:709: UserWarning: Trying to display complex-valued input. Showing magnitude instead.\n", + " warnings.warn('Trying to display complex-valued input. '\n" + ] + }, + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Linear-frequency power spectrogram')" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from librosa import display\n", + "DD = librosa.amplitude_to_db(np.abs(D), ref=np.max)\n", + "plt.subplot(1, 1, 1)\n", + "librosa.display.specshow(D, y_axis='linear')\n", + "plt.colorbar(format='%+2.0f dB')\n", + "plt.title('Linear-frequency power spectrogram')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "from librosa import display\n" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Linear-frequency power spectrogram')" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + ">>> DD = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)\n", + ">>> plt.subplot(4, 2, 1)\n", + ">>> librosa.display.specshow(DD, y_axis='linear')\n", + ">>> plt.colorbar(format='%+2.0f dB')\n", + ">>> plt.title('Linear-frequency power spectrogram')" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_features(window_size,n,hl,n_fft):\n", + "\n", + " for i in df.ID[0:20]:\n", + " try:\n", + " [y,sr] = sf.read('C:/Users/clyde/Documents/Thinkful/Pre Data Science Bootcamp/Sound Classification Datbase/Train/%d.wav'%i)\n", + " if y.ndim ==2 and len(y) < window_size:\n", + " continue\n", + " \n", + " elif y.ndim == 2 and len(y)>=window_size:\n", + " y= y[:,1].T[0:window_size] \n", + " y_pad = librosa.util.fix_length(y, n + n_fft // 2)\n", + " print(len(y))\n", + " \n", + " elif y.ndim ==1 and len(y)