-
Notifications
You must be signed in to change notification settings - Fork 0
/
beatChromaLuma.py
228 lines (188 loc) · 8.65 KB
/
beatChromaLuma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
'''
Compute beat-synchronous chroma-luma matrices
'''
# <codecell>
import librosa
import numpy as np
import chromaLuma
import scipy.signal
# <codecell>
def onset_strength_median(y=None, sr=22050, S=None, **kwargs):
"""Extract onsets from an audio time series or spectrogram, using median
Arguments:
y -- (ndarray) audio time-series | default: None
sr -- (int) sampling rate of y | default: 22050
S -- (ndarray) pre-computed spectrogram | default: None
**kwargs -- Parameters to mel spectrogram, if S is not provided
See librosa.feature.melspectrogram() for details
Note: if S is provided, then (y, sr) are optional.
Returns onsets:
onsets -- (ndarray) vector of onset strength
Raises:
ValueError -- if neither (y, sr) nor S are provided
"""
# First, compute mel spectrogram
if S is None:
if y is None:
raise ValueError('One of "S" or "y" must be provided.')
S = librosa.feature.melspectrogram(y, sr = sr, **kwargs)
# Convert to dBs
S = librosa.logamplitude(S)
### Compute first difference
onsets = np.diff(S, n=1, axis=1)
### Discard negatives (decreasing amplitude)
# falling edges could also be useful segmentation cues
# to catch falling edges, replace max(0,D) with abs(D)
onsets = np.maximum(0.0, onsets)
### Average over mel bands
onsets = np.median(onsets, axis=0)
return onsets
# <codecell>
def getTuningOffset( spectrogram, fs ):
'''
Given a spectrogram of a song, compute its tuning offset in semitones
Input:
spectrogram - Magnitude STFT of a song, size nBins x nFrames
fs - sampling rate of the song
Output:
tuningOffset - tuning offset in semitones of this song
'''
# Pre-allocate with the largest it could be
semiDiffs = np.zeros( spectrogram.shape[0]*spectrogram.shape[1] )
n = 0
# Freqs corresponding to each bin in FFT
for X in spectrogram.T:
# Compute local maxima of DFT
Xc = X*(X > .1*np.max( X ))#*(X > 2*scipy.signal.medfilt( X, 31 ) )
localMax = np.flatnonzero( np.logical_and(Xc > np.hstack([Xc[0], Xc[:-1]]), Xc > np.hstack([Xc[1:], Xc[-1]])) )
# Parabolic interpolation
alpha = X[localMax - 1]
beta = X[localMax]
gamma = X[localMax + 1]
localMax = localMax + .5*(alpha - gamma)/(alpha - 2*beta + gamma)
# Get frequencies corresponding to the local max
localMaxFreqs = localMax*fs/(2.0*(spectrogram.shape[0] - 1))
# Convert to MIDI note number (Hz)
localMaxNotes = librosa.feature.hz_to_midi( localMaxFreqs )
# Throw out values outside of musical range
localMaxNotes = localMaxNotes[np.logical_and( localMaxNotes >= 24, localMaxNotes < 108 )]
# Compute semitone differences
trackSemiDiffs = localMaxNotes - np.round( localMaxNotes )
semiDiffs[n:n + trackSemiDiffs.shape[0]] = trackSemiDiffs
n += trackSemiDiffs.shape[0]
semiDiffs = semiDiffs[:n]
counts, bins = np.histogram( semiDiffs, 100 )
bestBin = np.argmax( counts )
return (bins[bestBin] + bins[bestBin + 1])/2.0
# <codecell>
def beatChromaLuma( filename, **kwargs ):
'''
Given a file, get the beat-synchronous chroma-luma matrices
Input:
filename - full path to file to process
minNote - minimum note number to consider, default 35.5
binsPerOctave - number of magnitude values to compute per octave, default 48
nOctaves - number of octaves, default 4
smoothingWindow - window to use to smooth the spectrum, None = don't smooth, default np.hanning( binsPerOctave )
smoothingPower - power to raise spectral envelope to, default 3.0, ignored if smoothingWindow=None
aWeight - whether or not to a-weight the spectrum, default False
takeLog - whether or not to take a log, default True
Output:
tuning - estimated tuning offset of the song, in semitones
beatTimes - vector of beat locations, in seconds, size nBeats
semitrums - matrix of per-beat semitrums, size nBeats x binsPerOctave*nOctaves
'''
binsPerOctave = kwargs.get( 'binsPerOctave', 48 )
nOctaves = kwargs.get( 'nOctaves', 4 )
# Read in audio data
audioData, fs = librosa.load( filename, sr=22050 )
hop = 64
frameSize = 2048
# Get beat locations - using modified median filter version for the onset envelope
_, beats = librosa.beat.beat_track( sr=fs, onsets=onset_strength_median( audioData, fs, hop_length=hop, n_fft=frameSize, n_mels=128 ), hop_length=hop, n_fft=frameSize )
# Convert beat locations to samples
beatSamples = beats*hop
# Get harmonic component of signal
spectrogram = librosa.stft( audioData, n_fft=frameSize, hop_length=frameSize/4 )
harmonicSpectrogram, _ = librosa.hpss.hpss_median( np.abs( spectrogram ), win_P=13, win_H=13, p=4 )
# Compute tuning offset
tuningOffset = getTuningOffset( harmonicSpectrogram, fs )
harmonicSpectrogram = harmonicSpectrogram*np.exp( 1j*np.angle( spectrogram ) )
harmonicData = librosa.istft( harmonicSpectrogram, n_fft=frameSize, hop_length=frameSize/4 )
# Compute a chroma-luma matrix for each beat
semitrums = np.zeros( (beats.shape[0], nOctaves*binsPerOctave) )
# Keep track of semitone differences
for n, (beatStart, beatEnd) in enumerate( zip( beatSamples[:-1], beatSamples[1:] ) ):
# Grab audio samples within this beat
beatData = harmonicData[beatStart:beatEnd]
semitrums[n] = chromaLuma.logFrequencySpectrum( beatData, fs, **kwargs )
return librosa.frames_to_time( beats, fs, hop ), semitrums
# <codecell>
def fakeSemigram( labelsFile, binsPerOctave, nOctaves ):
'''
Given annotations, generates a synthetic semigram.
Input:
labelsFile - chord label file of the song in question
binsPerOctave - number of semigram bins in each octave
nOctaves - number of octaves to compute
Output:
fakeSemigram - a synthetic semigram matrix, size nBins x nBeats
'''
import pickle
with open( "Training_Scripts/dict_minmaj.p" ) as f:
labelToIntervals = pickle.load( f )[1]
labels = np.load( labelsFile )
baseOctave = np.zeros( (binsPerOctave, labels.shape[0]), dtype=np.bool )
binsPerSemi = binsPerOctave/4
for n, label in enumerate( labels ):
for semi in labelToIntervals[label]:
semis = np.arange( semi*4, (semi+1)*4 )
baseOctave[semis, n] = 1
octaves = np.tile( baseOctave, (nOctaves, 1) )
octaves = np.roll( octaves, -binsPerOctave/24, axis=0 )
return octaves
# <codecell>
def synthesizeSemigram( beats, semitrums, wavFile, minNote=35.5, binsPerOctave=48 ):
'''
Synthesize a semigram using sinusoids
Input:
beats - beat locations in seconds
semitrums - semigram, size nBeats x nBins
wavFile - where to write the synthesized semigram
minNote - MIDI note of the lowest bin of the semigram, default 35.5
binsPerOctave - number of bins in each octave in the semigram, default 48
'''
fs = 44100
binsPerSemi = binsPerOctave/12
firstBin = binsPerSemi/2
notes = minNote + np.arange( firstBin, semitrums.shape[1], binsPerSemi )/binsPerSemi
frequencies = librosa.feature.midi_to_hz( notes )
beats -= beats[0]
beats = np.append( beats, beats[-1] + .5 )
N = np.int( fs*beats[-1] )
output = np.zeros( N )
for n, freq in enumerate( frequencies ):
sine = scipy.signal.square( freq*2.0*np.pi*np.arange( N )/fs, .25 )
for m, (start, end) in enumerate( zip( beats[:-1], beats[1:] ) ):
sine[start*fs:end*fs] *= semitrums[m, n*binsPerSemi + firstBin]**2
output += sine
output /= np.max( np.abs( output ) )
librosa.output.write_wav( wavFile, output, fs )
# <codecell>
if __name__ == '__main__':
import os
import glob
from joblib import Parallel, delayed
def processSong( mp3File ):
beats, semitrums = beatChromaLuma( mp3File )
nameBase = os.path.splitext( mp3File )[0]
np.save( nameBase + '-beats.npy', beats )
np.save( nameBase + '-CL-magnitude.npy', semitrums )
n_jobs = 6
mp3Files = glob.glob( 'data/beatles/*.mp3' )
Parallel(n_jobs=n_jobs)(delayed(processSong)(mp3File) for mp3File in mp3Files)
mp3Files = glob.glob( os.path.join( 'data/uspop2002/*.mp3' ) )
Parallel(n_jobs=n_jobs)(delayed(processSong)(mp3File) for mp3File in mp3Files)