forked from aalto-speech/speaker-diarization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspk-diarization2.py
executable file
·138 lines (117 loc) · 5.67 KB
/
spk-diarization2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python2
import argparse
import sys
from os import getcwd
import os.path as op
from tempfile import mkstemp, gettempdir
from subprocess import Popen, call
from mimetypes import guess_type
import numpy as np
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process a media file to perform\
segmentation and speaker clustering on it.')
parser.add_argument('infile', type=str,
help='Specifies the media file')
parser.add_argument('-o', dest='outfile', type=str, default='stdout',
help='Specifies an output recipe file, default stdout.')
# parser.add_argument('-of', dest='outformat', type=str,
# choices=['aku', 'elan', 'ann'], default='aku',
# help='Specifies an output format, defaults to aku recipe\
# file, but includes support for ELAN .eaf and simple\
# annotation files.')
parser.add_argument('-fc', dest='fcpath', type=str, default=getcwd(),
help='Specifies the path to feacat, defaults to ./')
parser.add_argument('-fcfg', dest='fcfg', type=str, default=getcwd() + '/fconfig.cfg',
help='Specifies the feacat acoustic model config, defaults ./fconfig.cfg')
parser.add_argument('-lna', dest='lnapath', type=str, default=getcwd() + '/lna',
help='Specifies the path to the lna files, defaults to ./lna')
parser.add_argument('-exp', dest='exppath', type=str, default=getcwd() + '/exp',
help='Specifies the path to the exp files, defaults to ./exp')
parser.add_argument('-fp', dest='feapath', type=str, default=getcwd() + '/fea',
help='Specifies the path to the feature files, defaults to ./fea')
parser.add_argument('-tmp', dest='tmppath', type=str, default='',
help='Specifies where to write the temporal files, defaults to system temporary folder.')
args = parser.parse_args()
# Process arguments
if not op.isfile(args.infile):
print '%s does not exist, exiting' % args.infile
sys.exit()
print 'Reading file:', args.infile
if args.outfile != 'stdout':
outfile = args.outfile
print 'Writing output to:', args.outfile
else:
outfile = 'stdout'
print 'Writing output to: stdout'
args.fcpath = op.join(args.fcpath, 'feacat')
if not op.isfile(args.fcpath):
print '%s does not exist, exiting' % args.fcpath
sys.exit()
print 'Using feacat from:', args.fcpath
if not op.isdir(args.tmppath):
args.tmppath = gettempdir()
print 'Writing temporal files in:', args.tmppath
if not op.isdir(args.lnapath):
print 'Path %s does not exist, exiting' % args.lnapath
sys.exit()
print 'Writing lna files in:', args.lnapath
if not op.isdir(args.exppath):
print 'Path %s does not exist, exiting' % args.exppath
sys.exit()
print 'Writing exp files in:', args.exppath
if not op.isdir(args.exppath):
print 'Path %s does not exist, exiting' % args.feapath
sys.exit()
print 'Writing features in:', args.feapath
# End of argument processing
# Checking if media file is .wav audio
mediatype = guess_type(args.infile)[0]
if mediatype != 'audio/x-wav':
print 'Media is not a .wav audio file, attempting to extract a .wav file'
print 'Calling ffmpeg'
infile = op.splitext(args.infile)[0] + '.wav'
call(['ffmpeg', '-i', args.infile, '-ar', '16000', '-ac', '1',
'-ab', '32k', infile])
else:
infile = args.infile
# Prepare an initial temporal recipe
init_recipe = mkstemp(suffix='.recipe', prefix='init', dir=args.tmppath)[1]
init_file = open(init_recipe, 'w')
init_file.write('audio=' + infile + '\n')
init_file.close()
print 'Performing exp generation and feacat concurrently'
child1 = Popen(['./generate_exp.py', init_recipe, '-e', args.exppath,
'-l', args.lnapath])
feafile = open(op.join(args.feapath, op.splitext(op.basename(infile))[0] + '.fea'), 'w')
child2 = Popen([args.fcpath, '-c', args.fcfg, '-H', '--raw-output',
infile], stdout=feafile)
# We need the exp files ready here
child1.wait()
def _formatf(f):
return str(f).replace('.', '')
print 'Calling voice-detection2.py'
mediafile = op.splitext(op.basename(args.infile))[0]
vad_recipe = mkstemp(suffix='.recipe', prefix='vad', dir=args.tmppath)[1]
call(['./voice-detection2.py', init_recipe, args.exppath,
'-o', vad_recipe, '-ms', '0.5', '-mns', '1.5'])
# We need to wait for the features to be ready here
print 'Waiting for feacat to end.'
child2.wait()
# sys.exit()
spkchange_recipe = mkstemp(suffix='.recipe', prefix='spkc',
dir=args.tmppath)[1]
print 'Calling spk-change-detection.py'
call(['./spk-change-detection.py', vad_recipe, args.feapath,
'-o', spkchange_recipe, '-m', 'gw', '-d', 'BIC', '-w', '1.0',
'-st', '3.0', '-dws', '0.1', '-l', '1.0'])
print 'Calling spk-clustering.py'
call(['./spk-clustering.py', spkchange_recipe, args.feapath,
'-o', outfile, '-m', 'hi', '-l', '1.3'])
# Outputting alternative formats
if outfile != 'stdout':
outf = op.splitext(op.basename(outfile))[0]
outfpath = op.dirname(outfile)
print 'Calling aku2ann.py'
call(['./aku2ann.py', outfile, '-o', op.join(outfpath, outf + '.ann')])
print 'Calling aku2elan.py'
call(['./aku2elan.py', outfile, '-o', op.join(outfpath, outf + '.eaf')])