From e5dc476a0ec542e2de8fe4123edf40f9e57da2a8 Mon Sep 17 00:00:00 2001 From: Jona te Lintelo Date: Wed, 22 Mar 2023 23:30:01 +0100 Subject: [PATCH] Update prototype.py --- skeleton/models/prototype.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/skeleton/models/prototype.py b/skeleton/models/prototype.py index 65e0341..8dcc348 100644 --- a/skeleton/models/prototype.py +++ b/skeleton/models/prototype.py @@ -24,7 +24,7 @@ EvaluationPair, evaluate_speaker_trials, ) -from skeleton.layers.resnet import ResNet +from skeleton.layers.resnext import ResNext from skeleton.layers.statistical_pooling import MeanStatPool1D @@ -69,7 +69,7 @@ def __init__( nn.ReLU(), ) - self.resnet = ResNet(((num_embedding, 2, num_embedding*2),(num_embedding*2, 2, num_embedding*4), (num_embedding*4, 2, num_embedding*8), (num_embedding*8, 2, num_embedding*16))) + self.resnet = ResNext(((num_embedding, 2, num_embedding*2),(num_embedding*2, 2, num_embedding*4), (num_embedding*4, 2, num_embedding*8), (num_embedding*8, 2, num_embedding*16))) # Pooling layer # assuming input of shape [BATCH_SIZE, NUM_EMBEDDING, REDUCED_NUM_FRAMES] @@ -111,12 +111,12 @@ def forward(self, spectrogram: t.Tensor) -> Tuple[t.Tensor, t.Tensor]: def compute_embedding(self, spectrogram: t.Tensor) -> t.Tensor: # modify to your liking! feature_representation = self.embedding_layer(spectrogram) # -> [128,128,239] - resnet_output = self.resnet(feature_representation) + output = self.resnext(feature_representation) - resnet_output = resnet_output[:, :, None] # -> ([128, 128, 1]) + output = output[:, :, None] # -> ([128, 128, 1]) - embedding = self.pooling_layer(resnet_output) # -> [128, 128] + embedding = self.pooling_layer(output) # -> [128, 128] return embedding def compute_prediction(self, embedding: t.Tensor) -> t.Tensor: