Skip to content

Commit

Permalink
Added method for initializing nmf drum templates based on provided dr…
Browse files Browse the repository at this point in the history
…um samples, some more beautifications
  • Loading branch information
Christian Dittmar committed Aug 9, 2024
1 parent 810076c commit 9b2502f
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 109 deletions.
29 changes: 14 additions & 15 deletions demo_drum_extraction_kam_nmf_score_informed.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@
"source": [
"import os\n",
"import numpy as np\n",
"import scipy.io.wavfile as wav\n",
"import soundfile as sf\n",
"import IPython.display as ipd\n",
"\n",
"from libnmfd.core.nmfconv import conv_model, drum_specific_soft_constraints_nmf, \\\n",
"from libnmfd.core.nmfconv import conv_model, \\\n",
" init_activations, init_templates, nmfd\n",
"from libnmfd.dsp.algorithms import hpss_kam_fitzgerald\n",
"from libnmfd.dsp.filters import alpha_wiener_filter\n",
"from libnmfd.dsp.transforms import forward_stft, inverse_stft, log_freq_log_mag\n",
"from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n",
"from libnmfd.utils.core_utils import percussiveness_estimation, visualize_components_kam, visualize_components_nmf\n",
"\n",
"from libnmfd.utils.core_utils import drum_specific_soft_constraints_nmf\n",
"\n",
"INPUT_DIR = 'data/'\n",
"OUT_DIR = 'output/'\n",
Expand All @@ -69,14 +69,12 @@
"outputs": [],
"source": [
"# read signal\n",
"fs, x = wav.read(os.path.join(INPUT_DIR, filename))\n",
"\n",
"x, fs = sf.read(file=os.path.join(INPUT_DIR, filename),dtype=np.float32)\n",
"\n",
"# make monaural if necessary\n",
"x = make_monaural(x)\n",
"\n",
"# convert wav from int16 to float32\n",
"x = pcm_int16_to_float32np(x)\n",
"\n",
"# read corresponding transcription files\n",
"melody_transcription = np.loadtxt(os.path.join(INPUT_DIR, 'runningExample_IGotYouMelody.txt'))\n",
"drums_transcription = np.loadtxt(os.path.join(INPUT_DIR, 'runningExample_IGotYouDrums.txt'))"
Expand Down Expand Up @@ -155,8 +153,8 @@
" # save result\n",
" out_filepath = os.path.join(OUT_DIR,\n",
" 'demoDrumExtractionKAM_NMF_percThreshold_KAM_component_{}_extracted_from_{}'.format(k, filename))\n",
" \n",
" wav.write(filename=out_filepath, rate=fs, data=y)"
"\n",
" sf.write(file=out_filepath, data=y, samplerate=fs)"
]
},
{
Expand Down Expand Up @@ -263,7 +261,7 @@
"\n",
"\n",
"num_comp_drum = drums_H.shape[0]\n",
"drums_W = init_templates(num_bins=num_bins, strategy='drums')"
"drums_W = init_templates(num_bins=num_bins, strategy='drums', num_template_frames=num_template_frames)"
]
},
{
Expand Down Expand Up @@ -382,8 +380,9 @@
" # save result\n",
" out_filepath = os.path.join(OUT_DIR,\n",
" 'demoDrumExtractionKAM_NMF_scoreInformed_NMF_component_{}_extracted_from_{}'.format(k, filename))\n",
" \n",
" wav.write(filename=out_filepath, rate=fs, data=y)\n",
"\n",
" sf.write(file=out_filepath, data=y, samplerate=fs)\n",
"\n",
" audios.append(y)"
]
},
Expand Down Expand Up @@ -457,7 +456,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -471,9 +470,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.11"
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
112 changes: 26 additions & 86 deletions demo_drum_sound_separation_nmf.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"from libnmfd.core.nmfconv import init_activations, init_templates, nmfd\n",
"from libnmfd.dsp.filters import alpha_wiener_filter\n",
"from libnmfd.dsp.transforms import forward_stft, inverse_stft\n",
"from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n",
"from libnmfd.utils import make_monaural #, pcm_int16_to_float32np\n",
"from libnmfd.utils.core_utils import visualize_components_nmf\n",
"\n",
"INPUT_DIR = 'data/'\n",
Expand Down Expand Up @@ -61,88 +61,6 @@
"x = make_monaural(x)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"desired_drum_classes = ['kick', 'snare', 'hihat']\n",
"\n",
"# set common parameters\n",
"num_comp = 3\n",
"num_iter = 30\n",
"num_template_frames = 8\n",
"\n",
"# spectral parameters\n",
"BLOCK_SIZE = 2048\n",
"HOP_SIZE = 512\n",
"\n",
"counter = 0\n",
"init_W_drums = list()\n",
"\n",
"\n",
"\n",
"for drum_class in desired_drum_classes:\n",
" # parse all audio files\n",
" \n",
" drum_audio_files = os.listdir(os.path.join(INPUT_DIR, drum_class))\n",
" print(drum_audio_files)\n",
"\n",
" drum_class_audios = None\n",
" \n",
" for drum_audio_file in drum_audio_files:\n",
" dx, fs = sf.read(file=os.path.join(INPUT_DIR, drum_class, drum_audio_file),dtype=np.float32)\n",
"\n",
" # make monaural if necessary\n",
" dx = make_monaural(dx)\n",
"\n",
" # normalize amplitude\n",
" dx = 0.99 * dx / np.max(np.abs(dx))\n",
"\n",
" # concatenate all audios for one drum class\n",
" if drum_class_audios is None:\n",
" drum_class_audios = dx.copy()\n",
" else:\n",
" drum_class_audios = np.hstack([drum_class_audios, dx])\n",
"\n",
"\n",
" \n",
" # STFT computation\n",
" _, A, _ = forward_stft(drum_class_audios, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, reconst_mirror=True, append_frames=True)\n",
"\n",
" # get dimensions and time and freq resolutions\n",
" num_bins, num_frames = A.shape\n",
" time_res = HOP_SIZE / fs\n",
" freq_res = fs / BLOCK_SIZE \n",
"\n",
" # generate initial guess for templates\n",
" init_W = init_templates(num_comp=1,\n",
" num_bins=num_bins,\n",
" strategy='random')\n",
" \n",
" # generate initial activations\n",
" init_H = init_activations(num_comp=1,\n",
" num_frames=num_frames,\n",
" strategy='uniform') \n",
"\n",
" # NMFD core method\n",
" nmfd_W, nmfd_H, nmfd_V, divKL, _ = nmfd(V=A, \n",
" num_comp=1, \n",
" num_frames=num_frames, \n",
" num_iter=num_iter,\n",
" num_template_frames=num_template_frames,\n",
" init_W=init_W,\n",
" init_H=init_H)\n",
"\n",
"\n",
" init_W_drums.append(np.array(nmfd_W).squeeze(0).copy())\n",
" \n",
"\n",
" \n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -188,8 +106,11 @@
"num_template_frames = 8\n",
"\n",
"# generate initial guess for templates\n",
"init_W = init_W_drums\n",
"\n",
"# generate initial guess for templates\n",
"init_W = init_templates(num_comp=num_comp,\n",
" num_bins=num_bins,\n",
" num_template_frames=num_template_frames,\n",
" strategy='drums')\n",
"\n",
"# generate initial activations\n",
"init_H = init_activations(num_comp=num_comp,\n",
Expand Down Expand Up @@ -310,6 +231,25 @@
"source": [
"ipd.Audio(audios[2].T, rate=fs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Reference: \n",
"[1] Christian Dittmar, Meinard Müller\n",
"<br>**Reverse Engineering the Amen Break — Score-Informed Separation and Restoration Applied to Drum Recordings**\n",
"<br>IEEE/ACM Transactions on Audio, Speech, and Language Processing, 24(9): 1531-1543, 2016.\n",
"<br>\n",
"[2] Christian Dittmar, Patricio López-Serrano, Meinard Müller\n",
"<br>**Unifying Local and Global Methods for Harmonic-Percussive Source Separation**\n",
"<br>In Proceedings of the IEEE International Conference on Acoustics,<br>Speech, and Signal Processing (ICASSP), 2018.\n",
"\n",
"#### If you use the libnmfd (NMF toolbox) please refer to \n",
"[3] Patricio López-Serrano, Christian Dittmar, Yiğitcan Özer, and Meinard Müller<br>\n",
"**NMF Toolbox: Music Processing Applications of Nonnegative Matrix Factorization**<br>\n",
"In Proceedings of the International Conference on Digital Audio Effects (DAFx), 2019."
]
}
],
"metadata": {
Expand All @@ -328,7 +268,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
"version": "3.8.19"
}
},
"nbformat": 4,
Expand Down
93 changes: 85 additions & 8 deletions libnmfd/core/nmfconv.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
import numpy as np
import soundfile as sf
from tqdm import tnrange
from typing import List, Tuple, Union

from libnmfd.dsp.filters import nema
from libnmfd.utils import EPS, load_matlab_dict, midi2freq
from libnmfd.utils.core_utils import drum_specific_soft_constraints_nmf

from libnmfd.dsp.transforms import forward_stft
from libnmfd.utils import EPS, midi2freq, make_monaural
#from libnmfd.utils.core_utils import drum_specific_soft_constraints_nmf

def nmf_conv(V:np.ndarray,
num_comp: int = 3,
Expand Down Expand Up @@ -425,6 +427,83 @@ def shift_operator(A: np.ndarray,

return shifted

def initialize_drum_specific_nmfd_templates(desired_drum_classes: List[str] = None,
num_iter: int = 30,
num_template_frames: int = 8,
block_size: int = 2048,
hop_size: int = 512,
fs: int = 44100,
input_dir: str = 'data/') -> List[np.ndarray]:
"""Implements the extraction of drum specific spectrogram templates. The method assumes, that
folders with the same name as the desired drums sounds are present inside the data directory.
These should contain single samples of the target drum sounds. Per default, we use pre-defined kick, snare and hihat samples.
"""
# set some default classes in case of empty user input
if desired_drum_classes == None:
desired_drum_classes = ['kick', 'snare', 'hihat']

# initialize empty list
init_W_drums = list()

for drum_class in desired_drum_classes:

# check if folder exists
if not os.path.isdir(os.path.join(input_dir, drum_class)):
raise NotADirectoryError(f"The specified folder {os.path.join(input_dir, drum_class)} does not exist.")

# parse all audio files
drum_audio_files = os.listdir(os.path.join(input_dir, drum_class))
print(drum_audio_files)

drum_class_audios = None

for drum_audio_file in drum_audio_files:
dx, orig_fs = sf.read(file=os.path.join(input_dir, drum_class, drum_audio_file),dtype=np.float32)

# make monaural if necessary
dx = make_monaural(dx)

# resample if necessary
if orig_fs != fs:
dx = resample(dx, len(dx)*orig_fs/fs)

# normalize amplitude
dx = 0.99 * dx / np.max(np.abs(dx))

# concatenate all audios for one drum class
if drum_class_audios is None:
drum_class_audios = dx.copy()
else:
drum_class_audios = np.hstack([drum_class_audios, dx])

# STFT computation
_, A, _ = forward_stft(drum_class_audios, block_size=block_size, hop_size=hop_size, reconst_mirror=True, append_frames=True)

# get dimensions and time and freq resolutions
num_bins, num_frames = A.shape

# generate initial guess for templates
init_W = init_templates(num_comp=1,
num_bins=num_bins,
strategy='random')

# generate initial activations
init_H = init_activations(num_comp=1,
num_frames=num_frames,
strategy='uniform')

# NMFD core method
nmfd_W, _, _, _, _ = nmfd(V=A,
num_comp=1,
num_frames=num_frames,
num_iter=num_iter,
num_template_frames=num_template_frames,
init_W=init_W,init_H=init_H)

# adjust the dimensions
init_W_drums.append(np.array(nmfd_W).squeeze(0).copy())

return init_W_drums

def init_templates(num_comp: int = None,
num_bins: int = None,
Expand Down Expand Up @@ -522,12 +601,10 @@ def init_templates(num_comp: int = None,
init_W[k][bin_range, :] = 1/(g+1)

elif strategy == 'drums':
dict_W = load_matlab_dict('data/dictW.mat', 'dictW')

if num_bins == dict_W.shape[0]:
for k in range(dict_W.shape[1]):
init_W.append(dict_W[:, k].reshape(-1, 1) * np.linspace(1, 0.1, num_template_frames))

# call sub-routine that extracts the NMFD templates for drums
init_W = initialize_drum_specific_nmfd_templates(num_template_frames=num_template_frames)

# needs to be overwritten
num_comp = len(init_W)

Expand Down

0 comments on commit 9b2502f

Please sign in to comment.