-
Notifications
You must be signed in to change notification settings - Fork 3
/
ExtractAudioEmbeddings.py
90 lines (78 loc) · 3.4 KB
/
ExtractAudioEmbeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
"""ExtractAudioEmbeddings
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1sE2UL7oaAY_I_c8GTmUszlK-o4Piqkss
"""
!pip install librosa
!pip install pydub
import librosa
import numpy as np
import glob, os
from os import path
from pydub import AudioSegment
import pickle as pkl
from google.colab import drive
drive.mount('/content/drive')
# convert wav to mp3
# files
path = "/content/drive/Shareddrives/NLP_group/Project/Datasetv3/Ted-Audio" #path to folder containing mp3 files of audio
dst = "/content/drive/Shareddrives/NLP_group/Project/Datasetv3/Ted-Audio/wav/" #path to destination folder
filenames = glob.glob(os.path.join(path, '*.mp3'))
filenames = sorted(filenames)
for filename in filenames:
print(filename)
sound = AudioSegment.from_mp3(filename)
name = filename.split('/')[-1]
name = name.split('.')[0]
name = dst + name + '.wav'
print(name)
sound.export(name, format="wav")
path = '/content/drive/Shareddrives/NLP_group/Project/Datasetv3/MutedAudio' #path to file containing laughter muted wav files
# path = '/content/drive/Shareddrives/NLP_group/Project/Datasetv3/Ted-Audio/wav'#path to file containing non funny wav files
path2 = '/content/drive/Shareddrives/NLP_group/Project/Datasetv3/audioembed/' #path to destination folder for audioembeddings
# path2 = '/content/drive/Shareddrives/NLP_group/Project/Datasetv3/GloveEmbed'
counter = 0
max = 8000
filenames = glob.glob(os.path.join(path, '*.wav'))
filenames = sorted(filenames)
for filename in filenames:
print(filename)
audio_data = filename
x, sr = librosa.load(audio_data) #extract audio
rms = librosa.feature.rms(x) #rms energy 1 feature
mfccs = librosa.feature.mfcc(x, sr=sr) #extracting MFCCs 20 features
chromagram = librosa.feature.chroma_stft(x, sr=sr) #extracting Chromagram 12 features
audiofeatures = np.concatenate((rms, mfccs, chromagram),axis =0) #concatenating all features 33 features
# if np.shape(audiofeatures)[1] > 8000: #code to check if length of clip exceeds 8000
# print(filename)
a = np.zeros((33, 8000 - np.shape(audiofeatures)[1])) #zero padding to lenght 8000
audioembeddings = np.concatenate((audiofeatures, a), axis=1 )
name = filename.split('/')[-1]
name = name.split('.')[0]
a_file = open(path2 + name + 'audioembed.txt', 'w') #save as a text file
np.savetxt(a_file, audioembeddings) #write the feature embedding into file
a_file.close()
# if np.shape(audiofeatures)[1] > 8000: #code to check if length of clip exceeds 8000
# print(filename)
counter += 1
print(counter)
# counter += 1
print(len(filenames))
#checking the shape of one file
t = np.loadtxt('/content/drive/Shareddrives/NLP_group/Project/Datasetv3/audioembed/AJ_TP_audio_01_mutedaudioembed.txt')
print(np.shape(t))
#combining all audioembeddings into a list
path2 = 'drive/Shareddrives/NLP_group/Project/Dataset/Audio_embeddings/' #folder containing audio embeddings
filenames = glob.glob(os.path.join(path2, '*audioembed.txt'))
filenames = sorted(filenames)
audio_embeddings = []
counter = 0
for filename in filenames:
print(filename)
embed = np.loadtxt(filename)
audio_embeddings.append(list(embed))
counter += 1
print(counter)
print(np.shape(audio_embeddings))
np.save(path2 + "audioembed", audio_embeddings) #save the list of audioembeddings as a pkl file