diff --git a/demo_edm_decomposition_four_comp.ipynb b/demo_edm_decomposition_four_comp.ipynb new file mode 100755 index 0000000..fae37f1 --- /dev/null +++ b/demo_edm_decomposition_four_comp.ipynb @@ -0,0 +1,226 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Date: Jun 2019 (*Review: March 2024*)
Programmer: Patricio López-Serrano, Yiğitcan Özer\n", + "This demo illustrates decomposition of a loop-based electronic dance music track, following [1].\n", + "\n", + "#### The notebook proceeds in the following steps:\n", + "
1. Load audio files for the complete, downmixed track, as well as for the individual loops that the track contains.\n", + "
2. Compute STFTs for all audio data.\n", + "
3. Each loop becomes a fixed template (\"page\") in the tensor W.\n", + "
The track spectrogram is the target to approximate, V.\n", + "
We wish to learn the activation matrix H, which answers the question\n", + "
\"Where was each loop activated throughout the track?\"\n", + "
4. Visualize results." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import scipy.io.wavfile as wav\n", + "import IPython.display as ipd\n", + "\n", + "from libnmfd.core.nmfconv import init_activations, nmfd\n", + "from libnmfd.dsp.filters import alpha_wiener_filter\n", + "from libnmfd.dsp.transforms import forward_stft, inverse_stft, log_freq_log_mag\n", + "from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n", + "from libnmfd.utils.core_utils import visualize_components_nmf\n", + "\n", + "\n", + "INPUT_DIR = 'data/'\n", + "OUT_DIR = 'output/'\n", + "\n", + "filename = 'LSDDM_EM_track.wav'\n", + "filename_fx = 'LSDDM_EM_Effects.wav'\n", + "filename_bass = 'LSDDM_EM_bass.wav'\n", + "filename_melody = 'LSDDM_EM_melody.wav'\n", + "filename_drums = 'LSDDM_EM_drums.wav'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Load the audio signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fs, x_tr = wav.read(os.path.join(INPUT_DIR, filename))\n", + "_ , x_fx = wav.read(os.path.join(INPUT_DIR, filename_fx))\n", + "_ , x_bass = wav.read(os.path.join(INPUT_DIR, filename_bass))\n", + "_ , x_melody = wav.read(os.path.join(INPUT_DIR, filename_melody))\n", + "_ , x_drums = wav.read(os.path.join(INPUT_DIR, filename_drums))\n", + "\n", + "# make monaural if necessary\n", + "x_tr = make_monaural(x_tr)\n", + "x_fx = make_monaural(x_fx)\n", + "x_bass = make_monaural(x_bass)\n", + "x_melody = make_monaural(x_melody)\n", + "x_drums = make_monaural(x_drums)\n", + "\n", + "# int16 -> float32 conversion\n", + "x_tr = pcm_int16_to_float32np(x_tr)\n", + "x_fx = pcm_int16_to_float32np(x_fx)\n", + "x_bass = pcm_int16_to_float32np(x_bass)\n", + "x_melody = pcm_int16_to_float32np(x_melody)\n", + "x_drums = pcm_int16_to_float32np(x_drums)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Compute STFT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# spectral parameters\n", + "BLOCK_SIZE = 2048\n", + "HOP_SIZE = 512\n", + "\n", + "# STFT computation\n", + "X_tr, A_tr, P_tr = forward_stft(x_tr, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "\n", + "# get dimensions and time and freq resolutions\n", + "num_bins, num_frames = X_tr.shape\n", + "time_res = HOP_SIZE / fs\n", + "freq_res = fs / BLOCK_SIZE\n", + "\n", + "# get logarithmically-spaced frequency axis version for visualization\n", + "log_freq_log_mag_A, log_freq_axis = log_freq_log_mag(ATr, freq_res=freq_res)\n", + "num_log_bins = len(log_freq_axis)\n", + "\n", + "# repeat for FX loop ---------------------------------------------------\n", + "X_bass, A_bass, _ = forward_stft(x_bass, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "X_fx, A_fx, _ = forward_stft(x_fx, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "X_melody, A_melody, _ = forward_stft(x_melody, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "X_drums, A_drums, _ = forward_stft(x_drums, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n", + "num_bins_bass, num_frames_bass = X_bass.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Apply NMF variants to STFT magnitude" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set common parameters\n", + "num_comp = 4\n", + "num_iter = 30\n", + "num_template_frames = num_frames_bass\n", + "\n", + "init_W = list()\n", + "init_W.append(A_drums)\n", + "init_W.append(A_melody)\n", + "init_W.append(A_bass)\n", + "init_W.append(A_fx)\n", + "\n", + "# generate initial activations\n", + "init_H = init_activations(num_comp=num_comp,\n", + " num_frames=num_frames,\n", + " strategy='uniform')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# NMFD core method\n", + "nmfd_W, nmfd_H, nmfd_V, _, _ = nmfd(V=A_tr, \n", + " num_comp=num_comp, \n", + " num_frames=num_frames, \n", + " num_bins=num_bins,\n", + " num_iter=num_iter,\n", + " num_template_frames=num_template_frames,\n", + " init_W=init_W,\n", + " init_H=init_H,\n", + " fix_W=True)\n", + "# alpha-Wiener filtering\n", + "nmfd_A, _ = alpha_wiener_filter(A_tr, nmfd_V, 1.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visualize\n", + "fh1, _ = visualize_components_nmf(V=A_tr, W=nmfd_W, H=nmfd_H, comp_V=nmfd_V, time_res=time_res,\n", + " freq_res=freq_res, log_comp=1e5, font_size=14)\n", + "\n", + "fh1.savefig(os.path.join(OUT_DIR, 'LSDDM_EM.png'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### References \n", + "[1] Patricio López-Serrano, Christian Dittmar, Jonathan Driedger, and Meinard Müller.\n", + "
**Towards Modeling and Decomposing Loop-based Electronic Music**\n", + "
In Proceedings of the International Conference on Music Information Retrieval (ISMIR), pages 502–508, New York City, USA, August 2016.\n", + "\n", + "#### If you use the 'NMF toolbox' please refer to:\n", + "[2] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller
\n", + "**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**
\n", + "In Proceedings of the International Conference on Digital Audio Effects (DAFx), 2019." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}