-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathlstm_chord_classification_application.py
89 lines (67 loc) · 2.6 KB
/
lstm_chord_classification_application.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from keras.models import model_from_json
import math
import numpy as np
import os
import pandas as pd
import seaborn as sns
from sklearn.metrics import hamming_loss, accuracy_score, roc_auc_score
import tfr
data_dir = '../data'
# load model
model_id = 'model_2017-11-19-19-41-34'
model_dir = data_dir + '/beatles/models/' + model_id
model_arch = '%s/%s_arch.json' % (model_dir, model_id)
model_weights = '%s/%s_weights.h5' % (model_dir, model_id)
print('loading model:', model_arch)
model = model_from_json(open(model_arch).read())
print('loading model weights:', model_weights)
model.load_weights(model_weights)
model.compile(loss='binary_crossentropy', optimizer='adam')
# load data
song = "05_-_And_I_Love_Her"
audio_file = data_dir + '/beatles/' + song + '.wav'
block_size = 4096
hop_size = 2048
print('loading audio:', audio_file)
signal_frames = tfr.SignalFrames(audio_file, frame_size=block_size, hop_size=hop_size)
print('computing chromagram')
# normalized to [0.; 1.]
X_orig = tfr.pitchgram(signal_frames, magnitudes='power_db_normalized')
### Features
print(X_orig.shape)
frame_count, feature_count = X_orig.shape
target_count = 12
# we'll cut the datasets into small sequences of frames
max_seq_size = 100
def pad_sequences(a, max_seq_size):
"""
Cuts the list of frames into fixed-length sequences.
The end is padded with zeros if needed.
(frame_count, feature_count) -> (seq_count, max_seq_size, feature_count)
"""
n = len(a)
n_padded = max_seq_size * (math.ceil(n / max_seq_size))
a_padded = np.zeros((n_padded, a.shape[1]), a.dtype)
a_padded[:n, :] = a
return a_padded.reshape(-1, max_seq_size, a.shape[1])
X_seq = pad_sequences(X_orig, max_seq_size)
print(X_seq.shape)
# add one more dimension (1 input channel for convolution)
X_seq_conv = X_seq.reshape(X_seq.shape[0], max_seq_size, feature_count, 1)
print(X_seq_conv.shape)
X = X_seq_conv
# prediction
batch_size = 32
# predict probabilities
# input shape: (sequence_count, sequence_size, feature_count, 1)
# output shape: (sequence_count, sequence_size, target_count)
# probabilities to binary classes
Y_pred_proba = model.predict(X, verbose=1, batch_size=batch_size)
Y_pred = (Y_pred_proba >= 0.5).astype(np.int32)
# reshape from sequences back and trim to the original frame count
Y_pred = Y_pred.reshape(-1, target_count)[:frame_count]
pred_file = data_dir + '/beatles/chord-pcs-predicted/%d_%d/%s/%s.tsv' % (block_size, hop_size, model_id, song)
pred_dir = os.path.dirname(pred_file)
os.makedirs(pred_dir, exist_ok=True)
np.savetxt(pred_file, Y_pred, delimiter='\t', fmt='%d')
print('saved predicted labels to: ', pred_file)