diff --git a/demo_drum_sound_separation_nmf.ipynb b/demo_drum_sound_separation_nmf.ipynb new file mode 100755 index 0000000..0ec4ac0 --- /dev/null +++ b/demo_drum_sound_separation_nmf.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Date: Jun 2019 (*Review: March 2024*)\n", + "\n", + "#### The notebook proceeds in the following steps:\n", + "
1. It loads an example audio file containing a drum recording\n", + "
2. It computes the STFT of the audio data.\n", + "
3. It applies NMFD as described in [1], with audio-informed initialization of the components\n", + "
4. It visualizes the decomposition results.\n", + "
5. It resynthesizes the separated audio streams and saves them as wav files to the hard drive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import scipy.io.wavfile as wav\n", + "import IPython.display as ipd\n", + "\n", + "from libnmfd.core.nmfconv import init_activations, init_templates, nmfd\n", + "from libnmfd.dsp.filters import alpha_wiener_filter\n", + "from libnmfd.dsp.transforms import forward_stft, inverse_stft\n", + "from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n", + "from libnmfd.utils.core_utils import visualize_components_nmf\n", + "\n", + "INPUT_DIR = 'data/'\n", + "OUT_DIR = 'output/'\n", + "\n", + "# create the output directory if it doesn't exist\n", + "if not os.path.isdir(OUT_DIR):\n", + " os.makedirs(OUT_DIR)\n", + "\n", + "# convert wav from int16 to float32\n", + "filename = 'runningExample_AmenBreak.wav'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Load the audio signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fs, x = wav.read(os.path.join(INPUT_DIR, filename))\n", + "\n", + "# make monaural if necessary\n", + "x = make_monaural(x)\n", + "\n", + "x = pcm_int16_to_float32np(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. compute STFT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# spectral parameters\n", + "BLOCK_SIZE = 2048\n", + "HOP_SIZE = 512\n", + "\n", + "# STFT computation\n", + "X, A, P = forward_stft(x, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "\n", + "# get dimensions and time and freq resolutions\n", + "num_bins, num_frames = X.shape\n", + "time_res = HOP_SIZE / fs\n", + "freq_res = fs / BLOCK_SIZE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Apply NMF variants to STFT magnitude" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set common parameters\n", + "num_comp = 3\n", + "num_iter = 30\n", + "num_template_frames = 8\n", + "\n", + "# generate initial guess for templates\n", + "init_W = init_templates(num_comp=num_comp,\n", + " num_bins=num_bins,\n", + " strategy='drums')\n", + "\n", + "\n", + "\n", + "# generate initial activations\n", + "init_H = init_activations(num_comp=num_comp,\n", + " num_frames=num_frames,\n", + " strategy='uniform')\n", + "\n", + "# NMFD core method\n", + "nmfd_W, nmfd_H, nmfd_V, divKL, _ = nmfd(V=A, \n", + " num_comp=num_comp, \n", + " num_frames=num_frames, \n", + " num_iter=num_iter,\n", + " num_template_frames=num_template_frames,\n", + " init_W=init_W,\n", + " init_H=init_H)\n", + "\n", + "\n", + "# alpha-Wiener filtering\n", + "nmfd_A, _ = alpha_wiener_filter(A, nmfd_V, 1.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#visualize\n", + "fh1, _ = visualize_components_nmf(V=A, W=nmfd_W, H=nmfd_H, comp_V=nmfd_A, time_res=time_res,\n", + " freq_res=freq_res, end_sec=3.8, font_size=14)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "audios = []\n", + "\n", + "# resynthesize results of NMF with soft constraints and score information\n", + "for k in range(num_comp):\n", + " Y = nmfd_A[k] * np.exp(1j * P);\n", + " y, _ = inverse_stft(X=Y,\n", + " block_size=BLOCK_SIZE,\n", + " hop_size=HOP_SIZE,\n", + " reconst_mirror=True,\n", + " append_frames=True,\n", + " num_samp=len(x))\n", + " audios.append(y)\n", + " \n", + " # save result\n", + " out_filepath = os.path.join(OUT_DIR,\n", + " 'Winstons_AmenBreak_NMFD_component_{}.wav'.format(k, filename))\n", + " \n", + " wav.write(filename=out_filepath, rate=fs, data=y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Breakbeat with 3 components" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ipd.Audio(x, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Kick Drum" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ipd.Audio(audios[0].T, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Snare Drum" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ipd.Audio(audios[1].T, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Ride Cymbal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ipd.Audio(audios[2].T, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reference: \n", + "[1] Christian Dittmar, Meinard Müller\n", + "
**Reverse Engineering the Amen Break — Score-Informed Separation and Restoration Applied to Drum Recordings**\n", + "
IEEE/ACM Transactions on Audio, Speech, and Language Processing, 24(9): 1531-1543, 2016.\n", + "\n", + "#### If you use the 'NMF toolbox' please refer to:\n", + "[2] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller
\n", + "**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**
\n", + "In Proceedings of the International Conference on Digital Audio Effects (DAFx), 2019." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}