Merge pull request #3 from groupmm/feat_dmr

Feat dmr
groupmm · Aug 9, 2024 · 3831eb7 · 3831eb7
2 parents 335d994 + c05ceb9
commit 3831eb7
Show file tree

Hide file tree

Showing 187 changed files with 4,370 additions and 19,898 deletions.
diff --git a/LICENCE → LICENSE b/LICENCE → LICENSE
diff --git a/data/hihat/funky001.wav b/data/hihat/funky001.wav
diff --git a/data/hihat/pitched006.wav b/data/hihat/pitched006.wav
diff --git a/data/hihat/pres_002.wav b/data/hihat/pres_002.wav
diff --git a/data/hihat/sabian_003.wav b/data/hihat/sabian_003.wav
diff --git a/data/hihat/tama_036.wav b/data/hihat/tama_036.wav
diff --git a/data/kick/electro008.wav b/data/kick/electro008.wav
diff --git a/data/kick/pitched005.wav b/data/kick/pitched005.wav
diff --git a/data/kick/rock005.wav b/data/kick/rock005.wav
diff --git a/data/kick/rock007.wav b/data/kick/rock007.wav
diff --git a/data/kick/sput001.wav b/data/kick/sput001.wav
diff --git a/data/snare/cheeba_002.wav b/data/snare/cheeba_002.wav
diff --git a/data/snare/metal0010.wav b/data/snare/metal0010.wav
diff --git a/data/snare/princes003.wav b/data/snare/princes003.wav
diff --git a/data/snare/rock018.wav b/data/snare/rock018.wav
diff --git a/data/snare/sonor_034.wav b/data/snare/sonor_034.wav
diff --git a/demo_audio_mosaicing_continuity.ipynb b/demo_audio_mosaicing_continuity.ipynb
@@ -4,8 +4,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Date: Jun 2019 *(review in March 2024)*<br>Programmer: Christian Dittmar, Yiğitcan Özer\n",
-    "This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n",
+    "#### Date: Jun 2019 *(review in August 2024)*<br>Programmer: Christian Dittmar, Yiğitcan Özer\n",
+    "<br>This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n",
     "\n",
     "#### The script proceeds in the following steps:\n",
     "<br>1. It loads an target audio file containing the intro of the song \"Let it be\", by \"The Beatles\".\n",
@@ -31,13 +31,13 @@
    "source": [
     "import os\n",
     "import numpy as np\n",
-    "import scipy.io.wavfile as wav\n",
+    "import soundfile as sf\n",
     "import IPython.display as ipd\n",
     "\n",
     "from libnmfd.core import nmf_diag\n",
     "from libnmfd.dsp.algorithms import griffin_lim\n",
     "from libnmfd.dsp.transforms import forward_stft\n",
-    "from libnmfd.utils import EPS, MAX_WAV_VALUE, make_monaural, pcm_int16_to_float32np\n",
+    "from libnmfd.utils import EPS, MAX_WAV_VALUE, make_monaural\n",
     "from libnmfd.utils.core_utils import visualize_components_nmf"
    ]
   },
@@ -72,16 +72,12 @@
    "outputs": [],
    "source": [
     "# read signals\n",
-    "fs, xs = wav.read(os.path.join(INPUT_DIR, filename_source))\n",
-    "fs, xt = wav.read(os.path.join(INPUT_DIR, filename_target))\n",
+    "xs, fs = sf.read(file=os.path.join(INPUT_DIR, filename_source),dtype=np.float32)\n",
+    "xt, fs = sf.read(file=os.path.join(INPUT_DIR, filename_target),dtype=np.float32)\n",
     "\n",
     "# make monaural if necessary\n",
-    "make_monaural(xs)\n",
-    "make_monaural(xt)\n",
-    "\n",
-    "# convert wavs from int16 to float32\n",
-    "xs = pcm_int16_to_float32np(xs)\n",
-    "xt = pcm_int16_to_float32np(xt)"
+    "xs = make_monaural(xs)\n",
+    "xt = make_monaural(xt)"
    ]
   },
   {
@@ -131,16 +127,6 @@
     "W0 = As * 1./ (EPS + np.sum(As, axis=0))\n",
     "Xs = Xs * 1./ (EPS + np.sum(As, axis=0))\n",
     "\n",
-    "# parameters taken from Jonathan Driedger's toolbox\n",
-    "paramNMFdiag = dict()\n",
-    "paramNMFdiag['fixW'] = True\n",
-    "paramNMFdiag['numOfIter'] = 20\n",
-    "paramNMFdiag['continuity'] = dict()\n",
-    "paramNMFdiag['continuity']['polyphony'] = 10\n",
-    "paramNMFdiag['continuity']['length'] = 7\n",
-    "paramNMFdiag['continuity']['grid'] = 5\n",
-    "paramNMFdiag['continuity']['sparsen'] = [1, 7]\n",
-    "\n",
     "# call the reference implementation as provided by Jonathan Driedger\n",
     "nmfdiagW, nmfdiagH = nmf_diag(V=At, \n",
     "                              num_iter=30,\n",
@@ -163,7 +149,12 @@
    "outputs": [],
    "source": [
     "# visualize\n",
-    "fh1, _ = visualize_components_nmf(V=At, W=nmfdiagW, H=nmfdiagH, comp_V=None, freq_res=freq_res, time_res=time_res,\n",
+    "fh1, _ = visualize_components_nmf(V=At, \n",
+    "                                  W=nmfdiagW, \n",
+    "                                  H=nmfdiagH, \n",
+    "                                  comp_V=None, \n",
+    "                                  freq_res=freq_res, \n",
+    "                                  time_res=time_res,\n",
     "                                  font_size=14)\n",
     "fh1.savefig(os.path.join(OUT_DIR, 'LetItBee_NMFdiag.png'))"
    ]
@@ -178,9 +169,9 @@
     "_, _, res = griffin_lim(X=contY, block_size=BLOCK_SIZE, hop_size=HOP_SIZE, )\n",
     "\n",
     "# save result\n",
-    "wav.write(filename=os.path.join(OUT_DIR, 'LetItBee_NMFdiag_with_target_' + filename_target),\n",
-    "          rate=fs,\n",
-    "          data=res*MAX_WAV_VALUE)"
+    "sf.write(file=os.path.join(OUT_DIR, 'LetItBee_NMFdiag_with_target_' + filename_target),\n",
+    "         samplerate=fs,\n",
+    "         data=res*MAX_WAV_VALUE)"
    ]
   },
   {
@@ -252,7 +243,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -266,9 +257,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.19"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/demo_drum_extraction_kam_nmf_perc_threshold.ipynb b/demo_drum_extraction_kam_nmf_perc_threshold.ipynb
@@ -4,10 +4,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Date: Jun 2019 (*Review: March 2024*)\n",
-    "\n",
-    "<br>Programmer: Christian Dittmar, Yiğitcan Özer\n",
-    "This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n",
+    "#### Date: Jun 2019 (*Review: August 2024*)<br>Programmer: Christian Dittmar, Yiğitcan Özer\n",
+    "<br>This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n",
     "\n",
     "#### The script proceeds in the following steps:\n",
     "<br>1. It loads an example audio file containing drums and melodic instruments\n",
@@ -32,16 +30,16 @@
    "source": [
     "import os\n",
     "import numpy as np\n",
-    "import scipy.io.wavfile as wav\n",
+    "import soundfile as sf\n",
     "import IPython.display as ipd\n",
     "\n",
-    "from libnmfd.core.nmfconv import conv_model, drum_specific_soft_constraints_nmf, \\\n",
-    "    init_activations, init_templates, nmfd\n",
+    "from libnmfd.core.nmfconv import conv_model, init_activations, init_templates, nmfd\n",
     "from libnmfd.dsp.algorithms import hpss_kam_fitzgerald\n",
     "from libnmfd.dsp.filters import alpha_wiener_filter\n",
     "from libnmfd.dsp.transforms import forward_stft, inverse_stft, log_freq_log_mag\n",
-    "from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n",
-    "from libnmfd.utils.core_utils import percussiveness_estimation, visualize_components_kam, visualize_components_nmf\n",
+    "from libnmfd.utils import make_monaural\n",
+    "from libnmfd.utils.core_utils import drum_specific_soft_constraints_nmf, \\\n",
+    "    percussiveness_estimation, visualize_components_kam, visualize_components_nmf\n",
     "\n",
     "INPUT_DIR = 'data/'\n",
     "OUT_DIR = 'output/'\n",
@@ -67,13 +65,10 @@
    "outputs": [],
    "source": [
     "# read signal\n",
-    "fs, x = wav.read(os.path.join(INPUT_DIR, filename))\n",
+    "x, fs = sf.read(os.path.join(INPUT_DIR, filename))\n",
     "\n",
     "# make monaural if necessary\n",
-    "x = make_monaural(x)\n",
-    "\n",
-    "# convert wav from int16 to float32\n",
-    "x = pcm_int16_to_float32np(x)"
+    "x = make_monaural(x)"
    ]
   },
   {
@@ -150,7 +145,7 @@
     "    out_filepath = os.path.join(OUT_DIR,\n",
     "                                'demoDrumExtractionKAM_NMF_percThreshold_KAM_component_{}_extracted_from_{}'.format(k, filename))\n",
     "    \n",
-    "    wav.write(filename=out_filepath, rate=fs, data=y)"
+    "    sf.write(file=out_filepath, samplerate=fs, data=y)"
    ]
   },
   {
@@ -310,7 +305,7 @@
    "source": [
     "# create reduced version of templates for visualization\n",
     "nmfdW_vis = list()\n",
-    "for nmfdW_curr in nmfdW:\n",
+    "for nmfdW_curr in nmfd_W:\n",
     "    nmfdW_curr = accu_mat @ nmfdW_curr\n",
     "    nmfdW_vis.append(nmfdW_curr)\n",
     "\n",
@@ -349,7 +344,7 @@
     "    out_filepath = os.path.join(OUT_DIR,\n",
     "                                'demoDrumExtractionKAM_NMF_percThreshold_NMF_component_{}_extracted_from_{}'.format(k, filename))\n",
     "    \n",
-    "    wav.write(filename=out_filepath, rate=fs, data=y)"
+    "    sf.write(file=out_filepath, samplerate=fs, data=y)"
    ]
   },
   {
@@ -422,7 +417,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -436,9 +431,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.19"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/demo_drum_extraction_kam_nmf_score_informed.ipynb b/demo_drum_extraction_kam_nmf_score_informed.ipynb
@@ -4,11 +4,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Date: Jun 2019 (*Review: March 2024*)\n",
-    "\n",
-    "<br>Programmer: Christian Dittmar, Yiğitcan Özer\n",
-    "\n",
-    "This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n",
+    "#### Date: Jun 2019 (*Review: August 2024*)<br>Programmer: Christian Dittmar, Yiğitcan Özer\n",
+    "<br>This is the demo script which illustrates the main functionalities of the 'NMF toolbox'. For a detailed description we refer to [1,2] (see References below).\n",
     "\n",
     "#### The notebook proceeds in the following steps:\n",
     "<br>1. It loads an example audio file containing drums and melodic instruments\n",
@@ -33,17 +30,16 @@
    "source": [
     "import os\n",
     "import numpy as np\n",
-    "import scipy.io.wavfile as wav\n",
+    "import soundfile as sf\n",
     "import IPython.display as ipd\n",
     "\n",
-    "from libnmfd.core.nmfconv import conv_model, drum_specific_soft_constraints_nmf, \\\n",
-    "    init_activations, init_templates, nmfd\n",
+    "from libnmfd.core.nmfconv import conv_model, init_activations, init_templates, nmfd\n",
     "from libnmfd.dsp.algorithms import hpss_kam_fitzgerald\n",
     "from libnmfd.dsp.filters import alpha_wiener_filter\n",
     "from libnmfd.dsp.transforms import forward_stft, inverse_stft, log_freq_log_mag\n",
     "from libnmfd.utils import make_monaural, pcm_int16_to_float32np\n",
     "from libnmfd.utils.core_utils import percussiveness_estimation, visualize_components_kam, visualize_components_nmf\n",
-    "\n",
+    "from libnmfd.utils.core_utils import drum_specific_soft_constraints_nmf\n",
     "\n",
     "INPUT_DIR = 'data/'\n",
     "OUT_DIR = 'output/'\n",
@@ -69,14 +65,12 @@
    "outputs": [],
    "source": [
     "# read signal\n",
-    "fs, x = wav.read(os.path.join(INPUT_DIR, filename))\n",
+    "\n",
+    "x, fs = sf.read(file=os.path.join(INPUT_DIR, filename),dtype=np.float32)\n",
     "\n",
     "# make monaural if necessary\n",
     "x = make_monaural(x)\n",
     "\n",
-    "# convert wav from int16 to float32\n",
-    "x = pcm_int16_to_float32np(x)\n",
-    "\n",
     "# read corresponding transcription files\n",
     "melody_transcription = np.loadtxt(os.path.join(INPUT_DIR, 'runningExample_IGotYouMelody.txt'))\n",
     "drums_transcription = np.loadtxt(os.path.join(INPUT_DIR, 'runningExample_IGotYouDrums.txt'))"
@@ -155,8 +149,8 @@
     "    # save result\n",
     "    out_filepath = os.path.join(OUT_DIR,\n",
     "                                'demoDrumExtractionKAM_NMF_percThreshold_KAM_component_{}_extracted_from_{}'.format(k, filename))\n",
-    "    \n",
-    "    wav.write(filename=out_filepath, rate=fs, data=y)"
+    "\n",
+    "    sf.write(file=out_filepath, data=y, samplerate=fs)"
    ]
   },
   {
@@ -241,7 +235,8 @@
     "# generate score-informed templates for the melodic part\n",
     "pitched_W = init_templates(num_bins=num_bins,\n",
     "                           num_template_frames=num_template_frames,\n",
-    "                           freq_res=freq_res,\n",
+    "                           fs=fs,\n",
+    "                           block_size=BLOCK_SIZE,\n",
     "                           pitches=melody_transcription[:, 1], \n",
     "                           strategy='pitched')\n",
     "\n",
@@ -263,7 +258,7 @@
     "\n",
     "\n",
     "num_comp_drum = drums_H.shape[0]\n",
-    "drums_W = init_templates(num_bins=num_bins, strategy='drums')"
+    "drums_W = init_templates(num_bins=num_bins, strategy='drums', num_template_frames=num_template_frames)"
    ]
   },
   {
@@ -382,8 +377,9 @@
     "    # save result\n",
     "    out_filepath = os.path.join(OUT_DIR,\n",
     "                                'demoDrumExtractionKAM_NMF_scoreInformed_NMF_component_{}_extracted_from_{}'.format(k, filename))\n",
-    "    \n",
-    "    wav.write(filename=out_filepath, rate=fs, data=y)\n",
+    "\n",
+    "    sf.write(file=out_filepath, data=y, samplerate=fs)\n",
+    "\n",
     "    audios.append(y)"
    ]
   },
@@ -457,7 +453,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -471,9 +467,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.19"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }