diff --git a/demo_audio_mosaicing_continuity.ipynb b/demo_audio_mosaicing_continuity.ipynb new file mode 100644 index 0000000..fceb71f --- /dev/null +++ b/demo_audio_mosaicing_continuity.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Date: Jun 2019 *(review in March 2024)*
Programmer: Christian Dittmar, Yiğitcan Özer\n", + "This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n", + "\n", + "#### The script proceeds in the following steps:\n", + "
1. It loads an target audio file containing the intro of the song \"Let it be\", by \"The Beatles\".\n", + "
2. It loads a source audio file containing the sound of buzzing bees in different pitches.\n", + "
3. It computes the STFT of all audio data.\n", + "
4. It applies the diagonal NMF as described in [1], in order to approximate the target with the timbral content of the source.\n", + "
5. It visualizes the NMF results.\n", + "
6. It resynthesizes the audio mosaic." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import scipy.io.wavfile as wav\n", + "import IPython.display as ipd\n", + "\n", + "from libnmfd.core import nmf_diag\n", + "from libnmfd.dsp.algorithms import griffin_lim\n", + "from libnmfd.dsp.transforms import forward_stft\n", + "from libnmfd.utils import EPS, MAX_WAV_VALUE, make_monaural, pcm_int16_to_float32np\n", + "from libnmfd.utils.core_utils import visualize_components_nmf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "INPUT_DIR = 'data'\n", + "OUT_DIR = 'output'\n", + "\n", + "# create the output directory if it doesn't exist\n", + "if not os.path.isdir(OUT_DIR):\n", + " os.makedirs(OUT_DIR)\n", + "\n", + "filename_source = 'Bees_Buzzing.wav'\n", + "filename_target = 'Beatles_LetItBe.wav'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Load the source and target signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# read signals\n", + "fs, xs = wav.read(os.path.join(INPUT_DIR, filename_source))\n", + "fs, xt = wav.read(os.path.join(INPUT_DIR, filename_target))\n", + "\n", + "# make monaural if necessary\n", + "make_monaural(xs)\n", + "make_monaural(xt)\n", + "\n", + "# convert wavs from int16 to float32\n", + "xs = pcm_int16_to_float32np(xs)\n", + "xt = pcm_int16_to_float32np(xt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Compute STFT of both signals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "BLOCK_SIZE = 2048\n", + "HOP_SIZE = 1024\n", + "\n", + "# STFT computation\n", + "Xs, As, Ps = forward_stft(xs, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "Xt, At, Pt = forward_stft(xt, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "\n", + "# get dimensions and time and freq resolutions\n", + "_, num_target_frames = Xt.shape\n", + "num_bins, num_source_frames = Xs.shape\n", + "time_res = HOP_SIZE / fs\n", + "freq_res = fs / BLOCK_SIZE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Apply continuity NMF variants to mosaicing pair" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize activations randomly\n", + "H0 = np.random.rand(num_source_frames, num_target_frames)\n", + "\n", + "# init templates by source frames\n", + "W0 = As * 1./ (EPS + np.sum(As, axis=0))\n", + "Xs = Xs * 1./ (EPS + np.sum(As, axis=0))\n", + "\n", + "# parameters taken from Jonathan Driedger's toolbox\n", + "paramNMFdiag = dict()\n", + "paramNMFdiag['fixW'] = True\n", + "paramNMFdiag['numOfIter'] = 20\n", + "paramNMFdiag['continuity'] = dict()\n", + "paramNMFdiag['continuity']['polyphony'] = 10\n", + "paramNMFdiag['continuity']['length'] = 7\n", + "paramNMFdiag['continuity']['grid'] = 5\n", + "paramNMFdiag['continuity']['sparsen'] = [1, 7]\n", + "\n", + "# call the reference implementation as provided by Jonathan Driedger\n", + "nmfdiagW, nmfdiagH = nmf_diag(V=At, \n", + " num_iter=30,\n", + " init_W=W0, \n", + " init_H=H0, \n", + " fix_W=True,\n", + " cont_polyphony=10,\n", + " cont_length=7,\n", + " cont_grid=5,\n", + " cont_sparsen= (1, 7))\n", + "\n", + "# create mosaic, replacing the magnitude frames by complex valued frames\n", + "contY = Xs @ nmfdiagH" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visualize\n", + "fh1, _ = visualize_components_nmf(V=At, W=nmfdiagW, H=nmfdiagH, comp_V=None, freq_res=freq_res, time_res=time_res,\n", + " font_size=14)\n", + "fh1.savefig(os.path.join(OUT_DIR, 'LetItBee_NMFdiag.png'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# resynthesize using Griffin-Lim, 50 iterations by default\n", + "_, _, res = griffin_lim(X=contY, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, )\n", + "\n", + "# save result\n", + "wav.write(filename=os.path.join(OUT_DIR, 'LetItBee_NMFdiag_with_target_' + filename_target),\n", + " rate=fs,\n", + " data=res*MAX_WAV_VALUE)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Let it Be" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# play\n", + "ipd.Audio(xt, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Buzzing Bees" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# play\n", + "ipd.Audio(xs, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Let it Bee!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# play\n", + "ipd.Audio(res.T, rate=fs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### References\n", + "[1] Jonathan Driedger, Thomas Prätzlich, and Meinard Müller
\n", + "**Let It Bee — Towards NMF-Inspired Audio Mosaicing**
\n", + "In Proceedings of the International Conference on Music Information Retrieval (ISMIR): 350–356, 2015.\n", + "\n", + "#### If you use the 'NMF toolbox' please refer to \n", + "[2] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller
\n", + "**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**
\n", + "In Proceedings of the International Conference on Digital Audio Effects (DAFx), 2019." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}