Skip to content

Commit

Permalink
expands the training set
Browse files Browse the repository at this point in the history
  • Loading branch information
Lam Nguyen Tùng Lam committed Mar 15, 2023
1 parent 3a0a9e3 commit ff28d6f
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 45 deletions.
1 change: 1 addition & 0 deletions cli_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def main(
max_epochs=epochs,
accelerator="gpu" if use_gpu else "cpu",
devices=1,
auto_lr_find=True,
callbacks=[checkpointer, LearningRateMonitor()],
logger=[tensorboard_logger, csv_logger],
default_root_dir="logs",
Expand Down
6 changes: 3 additions & 3 deletions experiments/experiment_1_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ val_trials_path=./data/tiny-voxceleb/val_trials.txt
dev_trials_path=./data/tiny-voxceleb/dev_trials.txt

# hyperparameters for optimization
batch_size=128
learning_rate=3e-3
num_epochs=30
batch_size=64
learning_rate=2e-3
num_epochs=40
num_workers=5

# hyperparameters related to data pre-processing and network architecture
Expand Down
98 changes: 61 additions & 37 deletions skeleton/data/datapipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,36 @@ def random_gain_aug(data, minimum=0.1, maximum=0.12): #change the percieved loud
return data * gain #scale but in amplitude

def randomize_effect():
effects = ['inject_noise', 'rd_speed_change','rand_gain', 'reverb', 'none']
choice = np.random.choice(effects, 1, p=[0.1,0.1,0.1,0.2,0.5]) # if aug on everything then not rep of test dataset
effects = ['inject_noise', 'rd_speed_change', 'none']
choice = np.random.choice(effects, 1, p=[0.05,0.05,0.9]) # if aug on everything then not rep of test dataset
return choice


def decode_wav(value: StreamWrapper) -> t.Tensor:
assert isinstance(value, StreamWrapper)

value, sample_rate = torchaudio.load(value)
# choice = randomize_effect()
# if choice == 'inject_noise':
# value = inject_noise(value, 0.01)
# elif choice == 'rd_speed_change':
# value = random_speed_change(value, sample_rate)
choice = randomize_effect()
if choice == 'inject_noise':
value = inject_noise(value, 0.01)
elif choice == 'rd_speed_change':
value = random_speed_change(value, sample_rate)
# elif choice == 'rand_gain':
# value= random_gain_aug(value, minimum=0.1, maximum=0.12)
# elif choice == 'reverb':
# value= reverb_aug(value,sample_rate)

assert sample_rate == 16_000

# make sure that audio has 1 dimension
value = torch.squeeze(value)

return value

def decode_wav_original(value: StreamWrapper) -> t.Tensor:
assert isinstance(value, StreamWrapper)

value, sample_rate = torchaudio.load(value)

assert sample_rate == 16_000

Expand Down Expand Up @@ -120,6 +136,19 @@ def decode(element: Tuple[str, StreamWrapper]):

return key, value

def decode_original(element: Tuple[str, StreamWrapper]):
assert isinstance(element, tuple) and len(element) == 2
key, value = element
assert isinstance(key, str)
assert isinstance(value, StreamWrapper)

if key.endswith(".wav"):
value = decode_wav_original(value)

if key.endswith(".json"):
value = decode_json(value)

return key, value

########################################################################################
# default pipeline loading data from tar files into a tuple (sample_id, x, y)
Expand All @@ -128,6 +157,7 @@ def decode(element: Tuple[str, StreamWrapper]):


def construct_sample_datapipe(
is_augmented: bool,
shard_folder: pathlib.Path,
num_workers: int,
buffer_size: int = 0,
Expand Down Expand Up @@ -163,20 +193,37 @@ def construct_sample_datapipe(
dp = TarArchiveLoader(dp, mode="r")

# decode each file in the tar to the expected python dataformat
dp = Mapper(dp, decode)
if is_augmented:
dp = Mapper(dp, decode)
else:
dp = Mapper(dp, decode_original)

# each file in the tar is expected to have the format `{key}.{ext}
# this groups all files with the same key into one dictionary
dp = WebDataset(dp)

# transform the dictionaries into tuple (sample_id, x, y)
dp = Mapper(dp, map_dict_to_tuple)
if is_augmented:
dp = Mapper(dp, map_dict_to_tuple)
else:
dp = Mapper(dp, map_dict_to_tuple_original)


# buffer tuples to increase variability
if buffer_size > 0:
dp = Shuffler(dp, buffer_size=buffer_size)
return dp

def map_dict_to_tuple_original(x: Dict) -> Sample:
sample_id = x[".json"]["sample_id"] + "_org"
wav = x[".wav"]

class_idx = x[".json"]["class_idx"]
if class_idx is None:
gt = None
else:
gt = t.tensor(x[".json"]["class_idx"], dtype=t.int64)
return Sample(sample_id, wav, gt)

def map_dict_to_tuple(x: Dict) -> Sample:
sample_id = x[".json"]["sample_id"]
Expand All @@ -187,7 +234,7 @@ def map_dict_to_tuple(x: Dict) -> Sample:
gt = None
else:
gt = t.tensor(x[".json"]["class_idx"], dtype=t.int64)

# print("Augmented: ", Sample(sample_id, wav, gt))
return Sample(sample_id, wav, gt)


Expand Down Expand Up @@ -268,25 +315,6 @@ def _print_sample(dp):
print(y)
print(f"{y.shape=}")
print(f"{y.dtype=}\n")
break

def debug_an():
shard_path = pathlib.Path(
"/home/anilsson/mlip/tiny-voxceleb-skeleton-2023/data/tiny-voxceleb-shards/train"
)

n_mfcc = 40

print("### construct_sample_datapipe ###")
dp = construct_sample_datapipe(shard_path, num_workers=0)
_print_sample(dp)

print("### pipe_chunk_sample ###")
dp = pipe_chunk_sample(dp, 16_000 * 3) # 3 seconds

_print_sample(dp)



def _debug():
shard_path = pathlib.Path(
Expand All @@ -296,12 +324,13 @@ def _debug():
n_mfcc = 40

print("### construct_sample_datapipe ###")
dp = construct_sample_datapipe(shard_path, num_workers=0)
dp = construct_sample_datapipe(True, shard_path, num_workers=0)
dp_org = construct_sample_datapipe(False, shard_path, num_workers=0)
dp = dp_org.concat(dp)
_print_sample(dp)

print("### pipe_chunk_sample ###")
dp = pipe_chunk_sample(dp, 16_000 * 3) # 3 seconds

_print_sample(dp)


Expand All @@ -315,10 +344,5 @@ def _debug():
_print_sample(dp)






if __name__ == "__main__":
#_debug()
debug_an()
_debug()
23 changes: 19 additions & 4 deletions skeleton/data/tiny_voxceleb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,38 @@ def __init__(
self.shard_folder = shard_folder
self.val_trials_path = val_trials_path
self.dev_trials_path = dev_trials_path
self.is_augmented = True

# init in setup()
self.train_dp_original = None
self.train_dp = None
self.val_dp = None
self.dev_dp = None

def setup(self, stage: Optional[str] = None) -> None:
# train dataloader
train_dp = construct_sample_datapipe(
# train dataloader (non-augmented)
train_dp_original = construct_sample_datapipe(not self.is_augmented,
self.shard_folder / "train", num_workers=self.num_workers_train
)
train_dp_original = pipe_chunk_sample(train_dp_original, self.chunk_length_num_frames)
train_dp_original = pipe_mfcc(train_dp_original, self.n_mfcc)
train_dp_original = pipe_batch_samples(train_dp_original, self.batch_size, drop_last=True)
self.train_dp_original = train_dp_original

# train dataloader (augmented)
train_dp = construct_sample_datapipe(self.is_augmented,
self.shard_folder / "train", num_workers=self.num_workers_train
)
# train_dp = train_dp_original.concat(train_dp)
train_dp = pipe_chunk_sample(train_dp, self.chunk_length_num_frames)
train_dp = pipe_mfcc(train_dp, self.n_mfcc)
train_dp = pipe_batch_samples(train_dp, self.batch_size, drop_last=True)
self.train_dp = train_dp

# self.train_dp = self.train_dp_original.concat(self.train_dp)

# val dataloader
val_dp = construct_sample_datapipe(
val_dp = construct_sample_datapipe(not self.is_augmented,
self.shard_folder / "val", num_workers=self.num_workers_eval
)
val_dp = pipe_chunk_sample(val_dp, self.chunk_length_num_frames)
Expand All @@ -73,14 +87,15 @@ def setup(self, stage: Optional[str] = None) -> None:

# dev dataloader
# we explicitly evaluate with a batch size of 1 and the whole utterance
dev_dp = construct_sample_datapipe(
dev_dp = construct_sample_datapipe(not self.is_augmented,
self.shard_folder / "dev", num_workers=self.num_workers_eval
)
dev_dp = pipe_mfcc(dev_dp, self.n_mfcc)
dev_dp = pipe_batch_samples(dev_dp, batch_size=1, drop_last=False)
self.dev_dp = dev_dp

def train_dataloader(self) -> TRAIN_DATALOADERS:
self.train_dp = torch.utils.data.ChainDataset([self.train_dp_original, self.train_dp])
return torch.utils.data.DataLoader(
self.train_dp, batch_size=None, num_workers=self.num_workers_train
)
Expand Down
2 changes: 1 addition & 1 deletion skeleton/layers/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def block(self, in_channels,num_residuals, out_channels):
if i == 0:
blk.append(ResidualBlock(in_channels, out_channels, 16 , 1, use_1x1conv=True))
else:
blk.append(ResidualBlock(in_channels * 2, out_channels, 16, 1, use_1x1conv=True))
blk.append(ResidualBlock(in_channels * 2, out_channels, 16, 1))
return nn.Sequential(*blk)

def forward(self, x):
Expand Down

0 comments on commit ff28d6f

Please sign in to comment.