diff --git a/deepsphinx/data.py b/deepsphinx/data.py index e31f111..6bf8e33 100644 --- a/deepsphinx/data.py +++ b/deepsphinx/data.py @@ -95,11 +95,11 @@ def read_data_thread( trans = tf.gfile.FastGFile(FLAGS.trans_file).readlines() random.shuffle(trans) for text, set_id_trans, speaker, audio_file in csv.reader(trans): - try: - text = [VOCAB_TO_INT[c] - for c in list(text)] + [VOCAB_TO_INT['']] - except KeyError: - continue + text = [VOCAB_TO_INT[c] for c in list(text)] + # A space is required after the sentence due to the way FST is set up + if (text[-1] != VOCAB_TO_INT[' ']): + text.append(VOCAB_TO_INT[' ']) + text.append(VOCAB_TO_INT['']) if (set_id == set_id_trans and ((not FLAGS.use_train_lm) or in_fst(fst, text))): feat = get_features(audio_file) diff --git a/setup.py b/setup.py deleted file mode 100644 index 8281622..0000000 --- a/setup.py +++ /dev/null @@ -1,17 +0,0 @@ -from setuptools import setup - -setup( - name='deepshinx', - version='0.1', - packages=['deepsphinx'], - scripts=['bin/deepsphinx-train', - 'bin/deepsphinx-infer'], - description='Trainer', - # Replace with tensorflow-gpu if using GPU - install_requires=['python_speech_features', - 'tensorflow==1.3.0', - 'scipy', - 'pysoundfile'], - extras_require = { - 'openfst': ['openfst'], -},)