diff --git a/README.md b/README.md index 30d3a55..b427f25 100644 --- a/README.md +++ b/README.md @@ -22,22 +22,25 @@ See [https://dcase-models.readthedocs.io](https://dcase-models.readthedocs.io/en ## Installation instructions We recommend to install DCASE-models in a dedicated virtual environment. For instance, using [anaconda](https://www.anaconda.com/): ``` -conda create -n dcase python=3.6 +conda create -n dcase python=3.10 conda activate dcase ``` For GPU support: ``` -conda install cudatoolkit cudnn +conda install cudatoolkit=11.3 cudnn +conda install cupti=11 +conda install -c nvidia cuda-nvcc +conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib ``` DCASE-models uses [SoX](http://sox.sourceforge.net/) for functions related to the datasets. You can install it in your conda environment by: ``` conda install -c conda-forge sox ``` -When installing the library, you must select the tensorflow variant: version 1 (CPU-only or GPU) or version 2. +When installing the library, you must select the tensorflow variant: version 1 (CPU-only or GPU) or version 2. Note that you can indicate a specific release of DCASE-models (it is possible that you need to pin installation to one of the latest releases if pypi version is not updated) ``` -pip install DCASE-models[keras_tf] # for tensorflow 1 CPU-only version -pip install DCASE-models[keras_tf_gpu] # for tensorflow 1 GPU version -pip install DCASE-models[tf2] # for tensorflow 2 +pip install DCASE-models[keras_tf]==v0.2.0-rc2 # for tensorflow 1 CPU-only version +pip install DCASE-models[keras_tf_gpu]==v0.2.0-rc2 # for tensorflow 1 GPU version +pip install DCASE-models[tf2]==v0.2.0-rc2 # for tensorflow 2 ``` To include visualization related dependencies, run the following instead: diff --git a/dcase_models/data/feature_extractor.py b/dcase_models/data/feature_extractor.py index 6a6b3b1..4fc0660 100644 --- a/dcase_models/data/feature_extractor.py +++ b/dcase_models/data/feature_extractor.py @@ -209,9 +209,7 @@ def extract(self, dataset): path_to_features_file = path_audio.replace( audio_path, features_path ) - path_to_features_file = path_to_features_file.replace( - 'wav', 'npy' - ) + path_to_features_file = os.path.splitext(path_to_features_file)[0] + '.npy' np.save(path_to_features_file, features_array) # Save parameters.json for future checking @@ -345,7 +343,7 @@ def pad_audio(self, audio): sequence_hop_samples = self.sequence_hop*self.audio_hop if len(audio) < sequence_samples: audio = librosa.util.fix_length( - audio, sequence_samples, axis=0, mode=self.pad_mode) + audio, size=sequence_samples, axis=0, mode=self.pad_mode) else: if self.sequence_hop_time > 0: audio_frames = int((len(audio) - self.audio_win) / self.audio_hop) + int(((len(audio) - self.audio_win) % self.audio_hop)>0) @@ -354,7 +352,7 @@ def pad_audio(self, audio): new_samples = new_frames * self.audio_hop + self.audio_win audio = librosa.util.fix_length( audio, - new_samples, + size=new_samples, axis=0, mode=self.pad_mode ) else: @@ -367,8 +365,8 @@ def convert_to_sequences(self, audio_representation): audio_representation = np.ascontiguousarray(audio_representation) audio_representation = librosa.util.frame( audio_representation, - self.sequence_frames, - self.sequence_hop, + frame_length=self.sequence_frames, + hop_length=self.sequence_hop, axis=0 ) else: diff --git a/dcase_models/util/metrics.py b/dcase_models/util/metrics.py index ae91ff5..3ba195f 100644 --- a/dcase_models/util/metrics.py +++ b/dcase_models/util/metrics.py @@ -87,7 +87,7 @@ def evaluate_metrics(model, data, metrics, **kwargs): n_files = len(X_val) for i in range(n_files): X = X_val[i] - Y_predicted = model.predict(X) + Y_predicted = model.predict(X, verbose=False) # if multiple outputs, select the first if type(Y_predicted) == list: Y_predicted = Y_predicted[0] @@ -102,7 +102,7 @@ def evaluate_metrics(model, data, metrics, **kwargs): n_files = len(X_val) for i in range(n_files): X = X_val[i] - Y_predicted = model.predict(X) + Y_predicted = model.predict(X, verbose=False) if type(Y_predicted) == list: Y_predicted = Y_predicted[0] predictions.append(Y_predicted)