-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
notebook added for drum sound separation (refactored)
- Loading branch information
Showing
1 changed file
with
274 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,274 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Date: Jun 2019 (*Review: March 2024*)\n", | ||
"\n", | ||
"#### The notebook proceeds in the following steps:\n", | ||
"<br>1. It loads an example audio file containing a drum recording\n", | ||
"<br>2. It computes the STFT of the audio data.\n", | ||
"<br>3. It applies NMFD as described in [1], with audio-informed initialization of the components\n", | ||
"<br>4. It visualizes the decomposition results.\n", | ||
"<br>5. It resynthesizes the separated audio streams and saves them as wav files to the hard drive." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import numpy as np\n", | ||
"import scipy.io.wavfile as wav\n", | ||
"import IPython.display as ipd\n", | ||
"\n", | ||
"from libnmfd.core.nmfconv import init_activations, init_templates, nmfd\n", | ||
"from libnmfd.dsp.filters import alpha_wiener_filter\n", | ||
"from libnmfd.dsp.transforms import forward_stft, inverse_stft\n", | ||
"from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n", | ||
"from libnmfd.utils.core_utils import visualize_components_nmf\n", | ||
"\n", | ||
"INPUT_DIR = 'data/'\n", | ||
"OUT_DIR = 'output/'\n", | ||
"\n", | ||
"# create the output directory if it doesn't exist\n", | ||
"if not os.path.isdir(OUT_DIR):\n", | ||
" os.makedirs(OUT_DIR)\n", | ||
"\n", | ||
"# convert wav from int16 to float32\n", | ||
"filename = 'runningExample_AmenBreak.wav'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 1. Load the audio signal" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fs, x = wav.read(os.path.join(INPUT_DIR, filename))\n", | ||
"\n", | ||
"# make monaural if necessary\n", | ||
"x = make_monaural(x)\n", | ||
"\n", | ||
"x = pcm_int16_to_float32np(x)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 2. compute STFT" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# spectral parameters\n", | ||
"BLOCK_SIZE = 2048\n", | ||
"HOP_SIZE = 512\n", | ||
"\n", | ||
"# STFT computation\n", | ||
"X, A, P = forward_stft(x, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", | ||
"\n", | ||
"# get dimensions and time and freq resolutions\n", | ||
"num_bins, num_frames = X.shape\n", | ||
"time_res = HOP_SIZE / fs\n", | ||
"freq_res = fs / BLOCK_SIZE" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 3. Apply NMF variants to STFT magnitude" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# set common parameters\n", | ||
"num_comp = 3\n", | ||
"num_iter = 30\n", | ||
"num_template_frames = 8\n", | ||
"\n", | ||
"# generate initial guess for templates\n", | ||
"init_W = init_templates(num_comp=num_comp,\n", | ||
" num_bins=num_bins,\n", | ||
" strategy='drums')\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"# generate initial activations\n", | ||
"init_H = init_activations(num_comp=num_comp,\n", | ||
" num_frames=num_frames,\n", | ||
" strategy='uniform')\n", | ||
"\n", | ||
"# NMFD core method\n", | ||
"nmfd_W, nmfd_H, nmfd_V, divKL, _ = nmfd(V=A, \n", | ||
" num_comp=num_comp, \n", | ||
" num_frames=num_frames, \n", | ||
" num_iter=num_iter,\n", | ||
" num_template_frames=num_template_frames,\n", | ||
" init_W=init_W,\n", | ||
" init_H=init_H)\n", | ||
"\n", | ||
"\n", | ||
"# alpha-Wiener filtering\n", | ||
"nmfd_A, _ = alpha_wiener_filter(A, nmfd_V, 1.0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#visualize\n", | ||
"fh1, _ = visualize_components_nmf(V=A, W=nmfd_W, H=nmfd_H, comp_V=nmfd_A, time_res=time_res,\n", | ||
" freq_res=freq_res, end_sec=3.8, font_size=14)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"audios = []\n", | ||
"\n", | ||
"# resynthesize results of NMF with soft constraints and score information\n", | ||
"for k in range(num_comp):\n", | ||
" Y = nmfd_A[k] * np.exp(1j * P);\n", | ||
" y, _ = inverse_stft(X=Y,\n", | ||
" block_size=BLOCK_SIZE,\n", | ||
" hop_size=HOP_SIZE,\n", | ||
" reconst_mirror=True,\n", | ||
" append_frames=True,\n", | ||
" num_samp=len(x))\n", | ||
" audios.append(y)\n", | ||
" \n", | ||
" # save result\n", | ||
" out_filepath = os.path.join(OUT_DIR,\n", | ||
" 'Winstons_AmenBreak_NMFD_component_{}.wav'.format(k, filename))\n", | ||
" \n", | ||
" wav.write(filename=out_filepath, rate=fs, data=y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Breakbeat with 3 components" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ipd.Audio(x, rate=fs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Kick Drum" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ipd.Audio(audios[0].T, rate=fs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Snare Drum" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ipd.Audio(audios[1].T, rate=fs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Ride Cymbal" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"ipd.Audio(audios[2].T, rate=fs)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Reference: \n", | ||
"[1] Christian Dittmar, Meinard Müller\n", | ||
"<br>**Reverse Engineering the Amen Break — Score-Informed Separation and Restoration Applied to Drum Recordings**\n", | ||
"<br>IEEE/ACM Transactions on Audio, Speech, and Language Processing, 24(9): 1531-1543, 2016.\n", | ||
"\n", | ||
"#### If you use the 'NMF toolbox' please refer to:\n", | ||
"[2] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller<br>\n", | ||
"**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**<br>\n", | ||
"In Proceedings of the International Conference on Digital Audio Effects (DAFx), 2019." | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.11" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |