Skip to content

Commit

Permalink
Kaldi decoder running on ptics with en LM
Browse files Browse the repository at this point in the history
Run alex/applications/PublicTransportInfoCs/vhub_test_kaldi
  • Loading branch information
oplatek committed Dec 21, 2013
1 parent 49c98cf commit 5665d1b
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 8 deletions.
22 changes: 22 additions & 0 deletions alex/applications/PublicTransportInfoCS/kaldi.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python
# vim: set fileencoding=UTF-8 filetype=python :
#
# When the configuration file is loaded, several automatic transformations
# are applied:
#
# 1) '{cfg_abs_path}' as a substring of atomic attributes is replaced by
# an absolute path of the configuration files. This can be used to
# make the configuration file independent of the location of programs
# using the configuration file.
#
# or better user use the as_project_path function

config = {
'ASR': {
'debug': True,
'type': 'Kaldi',
'Kaldi': {
'debug': True,
}
},
}
5 changes: 5 additions & 0 deletions alex/applications/PublicTransportInfoCS/vhub_test_kaldi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

cd ..

./vhub.py -c ./PublicTransportInfoCS/ptics.cfg ./PublicTransportInfoCS/kaldi.cfg
34 changes: 26 additions & 8 deletions alex/components/asr/kaldi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# FIXME PYTHONPATH I can change : sys.path insert into(0,)
raise KaldiSetupException('%s\nTry setting PYTHONPATH or LD_LIBRARY_PATH' % e.message)
import time
from datetime import datetime
import os


class KaldiASR(object):
Expand All @@ -25,6 +27,8 @@ def __init__(self, cfg):
self.logger = cfg['Logging']['system_logger']
self.cfg = cfg
kcfg = cfg['ASR']['Kaldi']

self.debug = kcfg['debug']
self.wst = wst2dict(kcfg['wst'])
self.max_dec_frames = kcfg['max_dec_frames']
# specify all other options in config
Expand All @@ -37,7 +41,6 @@ def __init__(self, cfg):

self.decoder = PyGmmLatgenWrapper()
self.decoder.setup(argv)
self.decoder.reset(keep_buffer_data=False) # FIXME is it necessary?

def flush(self):
"""
Expand All @@ -56,31 +59,46 @@ def rec_in(self, frame):
:frame: @todo
:returns: self - The instance of KaldiASR
"""
start = time.clock()
frame_total, start = 0, time.clock()
self.decoder.frame_in(frame.payload)
self.logger.info('frame_in of %d frames' % (len(frame.payload) / 2))
self.logger.debug('frame_in of %d frames' % (len(frame.payload) / 2))
dec_t = self.decoder.decode(max_frames=self.max_dec_frames)
while dec_t > 0:
frame_total += dec_t
dec_t = self.decoder.decode(max_frames=self.max_dec_frames)
self.logger.info('Forward decoding of %d frames in %s secs' % (
dec_t, str(time.clock() - start)))
if (frame_total > 0):
self.logger.debug('Forward decoding of %d frames in %s secs' % (
frame_total, str(time.clock() - start)))
return self

def hyp_out(self):
""" This defines asynchronous interface for speech recognition.
Returns recognizers hypotheses about the input speech audio.
"""
start = time.clock()

# Get hypothesis
self.decoder.prune_final()
lat = self.decoder.get_lattice()
utt_prob, lat = self.decoder.get_lattice()
self.decoder.reset(keep_buffer_data=False)

# Convert lattice to nblist
nbest = lattice_to_nbest(lat, n=5)
nblist = UtteranceNBList()
for w, word_ids in nbest:
words = [self.wst[str(i)] for i in word_ids]
words = ' '.join([self.wst[str(i)] for i in word_ids])
nblist.add(w, Utterance(words))
self.logger.info('hyp_out: get_lattice+nbest in %s secs' % str(time.clock() - start))

# Log
if len(nbest) == 0:
self.logger.warning('hyp_out: empty hypothesis')
nblist.add(1.0, Utterance('Empty hypothesis: DEBUG'))
if self.debug:
output_file_name = os.path.join(
self.logger.get_session_dir_name(),
'%s.fst' % str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')))
lat.write(output_file_name)
self.logger.info('utterance "probability" is %f' % utt_prob)
self.logger.debug('hyp_out: get_lattice+nbest in %s secs' % str(time.clock() - start))

return nblist
1 change: 1 addition & 0 deletions alex/resources/default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ config = {
},
},
'Kaldi': {
'debug': False,
'wst': os.path.abspath('../resources/asr/kaldi/words.txt'),
'config': os.path.abspath('../resources/asr/kaldi/decode.conf'),
'verbose': 0,
Expand Down

0 comments on commit 5665d1b

Please sign in to comment.