notebook added for drum sound separation (refactored)

groupmm · Mar 20, 2024 · b232538 · b232538
1 parent 7144f27
commit b232538
Showing 1 changed file with 274 additions and 0 deletions.
diff --git a/demo_drum_sound_separation_nmf.ipynb b/demo_drum_sound_separation_nmf.ipynb
@@ -0,0 +1,274 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Date: Jun 2019 (*Review: March 2024*)\n",
+    "\n",
+    "#### The notebook proceeds in the following steps:\n",
+    "<br>1. It loads an example audio file containing a drum recording\n",
+    "<br>2. It computes the STFT of the audio data.\n",
+    "<br>3. It applies NMFD as described in [1], with audio-informed initialization of the components\n",
+    "<br>4. It visualizes the decomposition results.\n",
+    "<br>5. It resynthesizes the separated audio streams and saves them as wav files to the hard drive."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "import scipy.io.wavfile as wav\n",
+    "import IPython.display as ipd\n",
+    "\n",
+    "from libnmfd.core.nmfconv import init_activations, init_templates, nmfd\n",
+    "from libnmfd.dsp.filters import alpha_wiener_filter\n",
+    "from libnmfd.dsp.transforms import forward_stft, inverse_stft\n",
+    "from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n",
+    "from libnmfd.utils.core_utils import visualize_components_nmf\n",
+    "\n",
+    "INPUT_DIR = 'data/'\n",
+    "OUT_DIR = 'output/'\n",
+    "\n",
+    "# create the output directory if it doesn't exist\n",
+    "if not os.path.isdir(OUT_DIR):\n",
+    "    os.makedirs(OUT_DIR)\n",
+    "\n",
+    "# convert wav from int16 to float32\n",
+    "filename = 'runningExample_AmenBreak.wav'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Load the audio signal"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fs, x = wav.read(os.path.join(INPUT_DIR, filename))\n",
+    "\n",
+    "# make monaural if necessary\n",
+    "x = make_monaural(x)\n",
+    "\n",
+    "x = pcm_int16_to_float32np(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. compute STFT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# spectral parameters\n",
+    "BLOCK_SIZE = 2048\n",
+    "HOP_SIZE = 512\n",
+    "\n",
+    "# STFT computation\n",
+    "X, A, P = forward_stft(x, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n",
+    "\n",
+    "# get dimensions and time and freq resolutions\n",
+    "num_bins, num_frames = X.shape\n",
+    "time_res = HOP_SIZE / fs\n",
+    "freq_res = fs / BLOCK_SIZE"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Apply NMF variants to STFT magnitude"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set common parameters\n",
+    "num_comp = 3\n",
+    "num_iter = 30\n",
+    "num_template_frames = 8\n",
+    "\n",
+    "# generate initial guess for templates\n",
+    "init_W = init_templates(num_comp=num_comp,\n",
+    "                        num_bins=num_bins,\n",
+    "                        strategy='drums')\n",
+    "\n",
+    "\n",
+    "\n",
+    "# generate initial activations\n",
+    "init_H = init_activations(num_comp=num_comp,\n",
+    "                          num_frames=num_frames,\n",
+    "                          strategy='uniform')\n",
+    "\n",
+    "# NMFD core method\n",
+    "nmfd_W, nmfd_H, nmfd_V, divKL, _ = nmfd(V=A, \n",
+    "                                        num_comp=num_comp, \n",
+    "                                        num_frames=num_frames, \n",
+    "                                        num_iter=num_iter,\n",
+    "                                        num_template_frames=num_template_frames,\n",
+    "                                        init_W=init_W,\n",
+    "                                        init_H=init_H)\n",
+    "\n",
+    "\n",
+    "# alpha-Wiener filtering\n",
+    "nmfd_A, _ = alpha_wiener_filter(A, nmfd_V, 1.0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#visualize\n",
+    "fh1, _ = visualize_components_nmf(V=A, W=nmfd_W, H=nmfd_H, comp_V=nmfd_A, time_res=time_res,\n",
+    "                                  freq_res=freq_res, end_sec=3.8, font_size=14)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "audios = []\n",
+    "\n",
+    "# resynthesize results of NMF with soft constraints and score information\n",
+    "for k in range(num_comp):\n",
+    "    Y = nmfd_A[k] * np.exp(1j * P);\n",
+    "    y, _ = inverse_stft(X=Y,\n",
+    "                        block_size=BLOCK_SIZE,\n",
+    "                        hop_size=HOP_SIZE,\n",
+    "                        reconst_mirror=True,\n",
+    "                        append_frames=True,\n",
+    "                        num_samp=len(x))\n",
+    "    audios.append(y)\n",
+    "    \n",
+    "    # save result\n",
+    "    out_filepath = os.path.join(OUT_DIR,\n",
+    "                                'Winstons_AmenBreak_NMFD_component_{}.wav'.format(k, filename))\n",
+    "    \n",
+    "    wav.write(filename=out_filepath, rate=fs, data=y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Breakbeat with 3 components"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Audio(x, rate=fs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Kick Drum"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Audio(audios[0].T, rate=fs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Snare Drum"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Audio(audios[1].T, rate=fs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Ride Cymbal"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Audio(audios[2].T, rate=fs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Reference: \n",
+    "[1] Christian Dittmar, Meinard Müller\n",
+    "<br>**Reverse Engineering the Amen Break — Score-Informed Separation and Restoration Applied to Drum Recordings**\n",
+    "<br>IEEE/ACM Transactions on Audio, Speech, and Language Processing, 24(9): 1531-1543, 2016.\n",
+    "\n",
+    "#### If you use the 'NMF toolbox' please refer to:\n",
+    "[2] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller<br>\n",
+    "**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**<br>\n",
+    "In Proceedings of the  International Conference on Digital Audio Effects (DAFx), 2019."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}