diff --git a/convert.py b/convert.py index f57363d..29a3aa3 100644 --- a/convert.py +++ b/convert.py @@ -35,29 +35,34 @@ def main(): model.to(device) print('done') - train_filelist, val_filelist = dataset.train_val_split( + cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args.hop_length, args.n_fft) + filelist = dataset.raw_data_split( dataset_dir=args.dataset, - split_mode=args.split_mode, - val_rate=0, - vocals=False + split_mode=args.split_mode ) - filelist = train_filelist + val_filelist - cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args.hop_length, args.n_fft) - for mix_path, inst_path in filelist: X_basename = os.path.splitext(os.path.basename(mix_path))[0] y_basename = os.path.splitext(os.path.basename(inst_path))[0] + pv_basename = X_basename + '_PseudoVocals' + pi_basename = X_basename + '_PseudoInstruments' + print('converting {}...'.format(X_basename)) - X_cache_dir = os.path.join(os.path.dirname(mix_path), cache_dir) - y_cache_dir = os.path.join(os.path.dirname(inst_path), cache_dir) - v_dir = os.path.join(os.path.split(os.path.dirname(inst_path))[0], 'vocals') - v_cache_dir = os.path.join(v_dir, cache_dir) + X_dir = os.path.dirname(mix_path) + y_dir = os.path.dirname(inst_path) + pv_dir = os.path.join(os.path.split(y_dir)[0], 'pseudo_vocals') + pi_dir = os.path.join(os.path.split(y_dir)[0], 'pseudo_instruments') + + X_cache_dir = os.path.join(X_dir, cache_dir) + y_cache_dir = os.path.join(y_dir, cache_dir) + pv_cache_dir = os.path.join(pv_dir, cache_dir) + pi_cache_dir = os.path.join(pi_dir, cache_dir) os.makedirs(X_cache_dir, exist_ok=True) os.makedirs(y_cache_dir, exist_ok=True) - os.makedirs(v_cache_dir, exist_ok=True) + os.makedirs(pv_cache_dir, exist_ok=True) + os.makedirs(pi_cache_dir, exist_ok=True) X, sr = librosa.load( mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast') @@ -73,19 +78,20 @@ def main(): y = spec_utils.wave_to_spectrogram(y, args.hop_length, args.n_fft) sp = inference.Separator(model, device, args.batchsize, args.cropsize) - _, v_spec = sp.separate_tta(X - y) - # a_spec, _ = sp.separate_tta(X - y) + _, pv = sp.separate_tta(X - y) + # pa, pv = sp.separate_tta(X - y) + + # pi = y + pa - # print('inverse stft of pseudo instruments...', end=' ') - # pseudo_inst = y + a_spec - # print('done') + # wave = spec_utils.spectrogram_to_wave(pv, hop_length=args.hop_length) + sf.write('{}/{}.wav'.format(pv_dir, pv_basename), [0], sr) + # wave = spec_utils.spectrogram_to_wave(pi, hop_length=args.hop_length) + sf.write('{}/{}.wav'.format(pi_dir, pi_basename), [0], sr) np.save('{}/{}.npy'.format(X_cache_dir, X_basename), X.transpose(2, 0, 1)) np.save('{}/{}.npy'.format(y_cache_dir, y_basename), y.transpose(2, 0, 1)) - - wave = spec_utils.spectrogram_to_wave(v_spec, hop_length=args.hop_length) - sf.write('{}/{}_Vocals.wav'.format(v_dir, X_basename), wave.T, sr) - np.save('{}/{}_Vocals.npy'.format(v_cache_dir, X_basename), v_spec.transpose(2, 0, 1)) + np.save('{}/{}.npy'.format(pv_cache_dir, pv_basename), pv.transpose(2, 0, 1)) + # np.save('{}/{}.npy'.format(pi_cache_dir, pi_basename), pi.transpose(2, 0, 1)) if __name__ == '__main__': diff --git a/lib/dataset.py b/lib/dataset.py index 2ba7c40..c6120ab 100644 --- a/lib/dataset.py +++ b/lib/dataset.py @@ -176,12 +176,12 @@ def make_pair(X_dir, y_dir, v_dir=None): return filelist -def train_val_split(dataset_dir, split_mode, val_rate, val_filelist=[], vocals=True): +def train_val_split(dataset_dir, split_mode, val_rate, val_filelist=[]): if split_mode == 'random': filelist = make_pair( os.path.join(dataset_dir, 'mixtures'), os.path.join(dataset_dir, 'instruments'), - os.path.join(dataset_dir, 'vocals') if vocals else None + os.path.join(dataset_dir, 'pseudo_vocals') ) random.shuffle(filelist) @@ -198,22 +198,42 @@ def train_val_split(dataset_dir, split_mode, val_rate, val_filelist=[], vocals=T elif split_mode == 'subdirs': if len(val_filelist) != 0: raise ValueError('`val_filelist` option is not available with `subdirs` mode') - + train_filelist = make_pair( os.path.join(dataset_dir, 'training/mixtures'), os.path.join(dataset_dir, 'training/instruments'), - os.path.join(dataset_dir, 'training/vocals') if vocals else None + os.path.join(dataset_dir, 'training/pseudo_vocals') ) val_filelist = make_pair( os.path.join(dataset_dir, 'validation/mixtures'), os.path.join(dataset_dir, 'validation/instruments'), - os.path.join(dataset_dir, 'validation/vocals') if vocals else None + os.path.join(dataset_dir, 'validation/pseudo_vocals') ) return train_filelist, val_filelist +def raw_data_split(dataset_dir, split_mode): + if split_mode == 'random': + filelist = make_pair( + os.path.join(dataset_dir, 'mixtures'), + os.path.join(dataset_dir, 'instruments'), + ) + elif split_mode == 'subdirs': + train_filelist = make_pair( + os.path.join(dataset_dir, 'training/mixtures'), + os.path.join(dataset_dir, 'training/instruments'), + ) + val_filelist = make_pair( + os.path.join(dataset_dir, 'validation/mixtures'), + os.path.join(dataset_dir, 'validation/instruments'), + ) + filelist = train_filelist + val_filelist + + return filelist + + def make_padding(width, cropsize, offset): left = offset roi_size = cropsize - offset * 2