diff --git a/examples/keyword_spotting/README.md b/examples/keyword_spotting/README.md
index 3b0c2cec..fdef4915 100644
--- a/examples/keyword_spotting/README.md
+++ b/examples/keyword_spotting/README.md
@@ -164,8 +164,33 @@ Total Memory cost (Network and NNoM): 15020
 msh > 
 ~~~
 
+## Testing it on PC
+Now, you can test your KWS model on PC using KWS test data to be preprocessed by the C MFCC implementation. First, you need to download the raw test data from Google Drive [here](https://drive.google.com/drive/folders/1gS2klWb02YvaoE5UTNDy9SQsS5ZTsvNN?usp=sharing). Then, replace the main_pc.c to be you main file. It will print the model predictions followed by the groundtruth, and will also calculate the Top-1 accuracy score every 100 samples.
 
-
+~~~
+0 right : 100% - Ground Truth is: right
+1 nine : 100% - Ground Truth is: right
+2 right : 100% - Ground Truth is: right
+3 right : 100% - Ground Truth is: right
+4 right : 100% - Ground Truth is: right
+5 right : 100% - Ground Truth is: right
+6 right : 100% - Ground Truth is: right
+7 right : 100% - Ground Truth is: right
+8 right : 100% - Ground Truth is: right
+9 forward : 66% - Ground Truth is: right
+10 right : 100% - Ground Truth is: right
+11 right : 100% - Ground Truth is: right
+12 right : 100% - Ground Truth is: right
+13 right : 100% - Ground Truth is: right
+14 right : 52% - Ground Truth is: right
+15 right : 100% - Ground Truth is: right
+16 right : 100% - Ground Truth is: right
+17 right : 100% - Ground Truth is: right
+18 right : 100% - Ground Truth is: right
+19 right : 74% - Ground Truth is: right
+20 right : 100% - Ground Truth is: right
+...
+~~~
 
 
 
diff --git a/examples/keyword_spotting/main_pc.c b/examples/keyword_spotting/main_pc.c
new file mode 100644
index 00000000..1db3db50
--- /dev/null
+++ b/examples/keyword_spotting/main_pc.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2018-2020, Jianjia Ma
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Change Logs:
+ * Date           Author       Notes
+ * 2019-03-29     Jianjia Ma   first implementation
+ *
+ * Notes:
+ * This is a keyword spotting example using NNoM
+ *
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "nnom.h"
+#include "kws_weights.h"
+
+#include "mfcc.h"
+#include "math.h"
+
+
+// NNoM model
+nnom_model_t *model;
+
+// 10 labels-1
+//const char label_name[][10] =  {"yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "unknow"};
+
+// 10 labels-2
+//const char label_name[][10] =  {"marvin", "sheila", "yes", "no", "left", "right", "forward", "backward", "stop", "go", "unknow"};
+
+// full 34 labels
+const char label_name[][10] =  {"backward", "bed", "bird", "cat", "dog", "down", "eight","five", "follow", "forward",
+                      "four", "go", "happy", "house", "learn", "left", "marvin", "nine", "no", "off", "on", "one", "right",
+                      "seven", "sheila", "six", "stop", "three", "tree", "two", "up", "visual", "yes", "zero", "unknow"};
+
+
+
+int16_t audio[512];
+char ground_truth[12000][10];
+#define SAMP_FREQ 16000
+#define AUDIO_FRAME_LEN (512) //31.25ms * 16000hz = 512, // FFT (windows size must be 2 power n)
+
+mfcc_t * mfcc;
+//int32_t audio_data[4000]; //32000/8
+int dma_audio_buffer[AUDIO_FRAME_LEN]; //512
+int16_t audio_buffer_16bit[(int)(AUDIO_FRAME_LEN*1.5)]; // an easy method for 50% overlapping
+int audio_sample_i = 0;
+
+//the mfcc feature for kws
+#define MFCC_LEN            (62)
+#define MFCC_COEFFS_FIRST   (1)     // ignore the mfcc feature before this number
+#define MFCC_COEFFS_LEN     (13)    // the total coefficient to calculate
+#define MFCC_TOTAL_NUM_BANK (26)    // total number of filter bands
+#define MFCC_COEFFS         (MFCC_COEFFS_LEN-MFCC_COEFFS_FIRST)
+
+#define MFCC_FEAT_SIZE  (MFCC_LEN * MFCC_COEFFS)
+float mfcc_features_f[MFCC_COEFFS];             // output of mfcc
+int8_t mfcc_features[MFCC_LEN][MFCC_COEFFS];     // ring buffer
+int8_t mfcc_features_seq[MFCC_LEN][MFCC_COEFFS]; // sequencial buffer for neural network input.
+uint32_t mfcc_feat_index = 0;
+
+// msh debugging controls
+bool is_print_abs_mean = false; // to print the mean of absolute value of the mfcc_features_seq[][]
+bool is_print_mfcc  = false;    // to print the raw mfcc features at each update
+void Error_Handler()
+{
+    printf("error\n");
+}
+
+static int32_t abs_mean(int8_t *p, size_t size)
+{
+    int64_t sum = 0;
+    for(size_t i = 0; i<size; i++)
+    {
+        if(p[i] < 0)
+            sum+=-p[i];
+        else
+            sum += p[i];
+    }
+    return sum/size;
+}
+
+void quantize_data(float*din, int8_t *dout, uint32_t size, uint32_t int_bit)
+{
+    #define _MAX(x, y) (((x) > (y)) ? (x) : (y))
+    #define _MIN(x, y) (((x) < (y)) ? (x) : (y))
+    float limit = (1 << int_bit);
+    float d;
+    for(uint32_t i=0; i<size; i++)
+    {
+        d = round(_MAX(_MIN(din[i], limit), -limit) / limit * 128);
+        d = d/128.0f;
+        dout[i] = round(d *127);
+    }
+}
+
+
+
+void thread_kws_serv()
+{
+
+    #define SaturaLH(N, L, H) (((N)<(L))?(L):(((N)>(H))?(H):(N)))
+    int *p_raw_audio;
+
+
+    // calculate 13 coefficient, use number #2~13 coefficient. discard #1
+    // features, offset, bands, 512fft, 0 preempha, attached_energy_to_band0
+    mfcc = mfcc_create(MFCC_COEFFS_LEN, MFCC_COEFFS_FIRST, MFCC_TOTAL_NUM_BANK, AUDIO_FRAME_LEN, 0.97f, true);
+
+        if (audio_sample_i == 15872)
+            memset(&dma_audio_buffer[128], 0, sizeof(int) * 128); //to fill the latest quarter in the latest frame
+        p_raw_audio = dma_audio_buffer;
+
+
+        // memory move
+        // audio buffer = | 256 byte old data |   256 byte new data 1 | 256 byte new data 2 |
+        //                         ^------------------------------------------|
+        memcpy(audio_buffer_16bit, &audio_buffer_16bit[AUDIO_FRAME_LEN], (AUDIO_FRAME_LEN/2)*sizeof(int16_t));
+
+        // convert it to 16 bit.
+        // volume*4
+        for(int i = 0; i < AUDIO_FRAME_LEN; i++)
+        {
+            audio_buffer_16bit[AUDIO_FRAME_LEN/2+i] = p_raw_audio[i];
+        }
+
+        // MFCC
+        // do the first mfcc with half old data(256) and half new data(256)
+        // then do the second mfcc with all new data(512).
+        // take mfcc buffer
+
+        for(int i=0; i<2; i++)
+        {
+            if ((audio_sample_i != 0 || i==1) && (audio_sample_i != 15872 || i==0)) //to skip computing first mfcc block that's half empty
+            {
+                mfcc_compute(mfcc, &audio_buffer_16bit[i*AUDIO_FRAME_LEN/2], mfcc_features_f);
+
+
+                // quantise them using the same scale as training data (in keras), by 2^n.
+                quantize_data(mfcc_features_f, mfcc_features[mfcc_feat_index], MFCC_COEFFS, 3);
+
+                // debug only, to print mfcc data on console
+                if(0)
+                {
+                    for(int q=0; q<MFCC_COEFFS; q++)
+                        printf("%d ",  mfcc_features[mfcc_feat_index][q]);
+                    printf("\n");
+                }
+
+                mfcc_feat_index++;
+                if(mfcc_feat_index >= MFCC_LEN)
+                    mfcc_feat_index = 0;
+            }
+
+        }
+}
+
+
+
+int main(void)
+{
+    uint32_t last_mfcc_index = 0;
+    uint32_t label;
+    float prob;
+    audio_sample_i = 0;
+    int s = 0; //number of audio samples to scan
+    float acc;
+    int correct = 0;
+    FILE * file;
+    FILE * ground_truth_f;
+    char str[10];
+    int j=0;
+    int F = 512;
+
+    file = fopen ("test_x.txt","r"); //the audio data stored in a textfile
+    ground_truth_f = fopen ("test_y.txt","r"); //the ground truth textfile
+
+     while (!feof (ground_truth_f))
+    {
+      fscanf (ground_truth_f, "%s", ground_truth[j]);
+      j++;
+    }
+    fclose (ground_truth_f);
+
+    int p = 0;
+
+    // create and compile the model
+    model = nnom_model_create();
+
+    while(1)
+    {
+      while (p<F)
+        {
+          fscanf(file, "%d", &dma_audio_buffer[p]);
+          p++;
+        }
+        p=0;
+        thread_kws_serv();
+        audio_sample_i = audio_sample_i + F;
+        if(audio_sample_i == 15872) //31*512
+            F = 128; //0.25*512
+        else
+            F = 512;
+
+        if(audio_sample_i>=16000)
+        {
+            // ML
+            memcpy(nnom_input_data, mfcc_features, MFCC_FEAT_SIZE);
+            nnom_predict(model, &label, &prob);
+
+            // output
+            printf("%d %s : %d%% - Ground Truth is: %s\n", s, (char*)&label_name[label], (int)(prob * 100),ground_truth[s]);
+            if(strcmp(ground_truth[s], label_name[label])==0) correct++;
+            if(s%100==0 && s > 0)
+            {
+                acc = ((float)correct/(s) * 100);
+                printf("Accuracy : %.6f%%\n",acc);
+            }
+            audio_sample_i = 0;
+            F = 512;
+            s=s+1;
+        }
+
+        if(s>=11000) break;
+    }
+    acc = ((float)correct/(s) * 100);
+    printf("Accuracy : %.6f%%\n",acc);
+    fclose(file);
+
+}
diff --git a/examples/keyword_spotting/mfcc.c b/examples/keyword_spotting/mfcc.c
index efee1a59..6e16aea4 100644
--- a/examples/keyword_spotting/mfcc.c
+++ b/examples/keyword_spotting/mfcc.c
@@ -34,7 +34,7 @@
 
 #ifndef MFCC_PLATFORM_ARM
 // FFT code from arduino_fft: https://github.com/lloydroc/arduino_fft
-// change to float data， modify to fit within this file 
+// change to float data， modify to fit within this file
 // see the above link for license( MIT license).
 #include <stdio.h>
 #include <math.h>
@@ -108,16 +108,16 @@ static void *mfcc_malloc(size_t size)
 	memset(p, 0, size);
 	return p;
 }
-	
+
 static void mfcc_free(void*p){
 	if(p!=NULL) free(p);
 }
 
-mfcc_t *mfcc_create(int num_mfcc_features, int feature_offset, int num_fbank, int frame_len, float preempha, int is_append_energy) 
+mfcc_t *mfcc_create(int num_mfcc_features, int feature_offset, int num_fbank, int frame_len, float preempha, int is_append_energy)
 {
 	mfcc_t * mfcc;
 	mfcc = mfcc_malloc(sizeof(mfcc_t));
-	
+
 	mfcc->num_mfcc_features = num_mfcc_features;
 	mfcc->num_features_offset = feature_offset;
 	mfcc->num_fbank = num_fbank;
@@ -132,7 +132,7 @@ mfcc_t *mfcc_create(int num_mfcc_features, int feature_offset, int num_fbank, in
 	mfcc->buffer = mfcc_malloc(sizeof(float)* mfcc->frame_len_padded);
 	mfcc->mel_energies = mfcc_malloc(sizeof(float)*mfcc->num_fbank );
 
-	//create window function, hanning 
+	//create window function, hanning
 	mfcc->window_func = mfcc_malloc(sizeof(float)*frame_len);
 	for (int i = 0; i < frame_len; i++)
 		mfcc->window_func[i] = 0.5f - 0.5f*cosf((float)M_2PI * ((float)i) / (frame_len));
@@ -178,7 +178,7 @@ void mfcc_delete(mfcc_t* mfcc)
 	mfcc_free(mfcc);
 }
 
-float * create_dct_matrix(int32_t input_length, int32_t coefficient_count) 
+float * create_dct_matrix(int32_t input_length, int32_t coefficient_count)
 {
 	int32_t k, n;
 	float * M = mfcc_malloc(sizeof(float) * input_length * coefficient_count);
@@ -188,9 +188,9 @@ float * create_dct_matrix(int32_t input_length, int32_t coefficient_count)
 #else
 	normalizer = sqrtf(2.0f/(float)input_length);
 #endif
-	for (k = 0; k < coefficient_count; k++) 
+	for (k = 0; k < coefficient_count; k++)
 	{
-		for (n = 0; n < input_length; n++) 
+		for (n = 0; n < input_length; n++)
 		{
 			M[k*input_length+n] = normalizer * cosf( ((float)M_PI)/input_length * (n + 0.5f) * k );
 		}
@@ -198,75 +198,46 @@ float * create_dct_matrix(int32_t input_length, int32_t coefficient_count)
 	return M;
 }
 
-float ** create_mel_fbank(mfcc_t *mfcc) {
-
-  int32_t bin, i;
-
-  int32_t num_fft_bins = mfcc->frame_len_padded/2;
-  float fft_bin_width = ((float)SAMP_FREQ) / mfcc->frame_len_padded;
-  float mel_low_freq = MelScale(MEL_LOW_FREQ);
-  float mel_high_freq = MelScale(MEL_HIGH_FREQ); 
-  float mel_freq_delta = (mel_high_freq - mel_low_freq) / (mfcc->num_fbank +1);
-
-  float *this_bin = mfcc_malloc(sizeof(float) * num_fft_bins);
-
-  float ** mel_fbank =  mfcc_malloc(sizeof(float*) * mfcc->num_fbank);
-
-  for (bin = 0; bin < mfcc->num_fbank ; bin++) {
-
-    float left_mel = mel_low_freq + bin * mel_freq_delta;
-    float center_mel = mel_low_freq + (bin + 1) * mel_freq_delta;
-    float right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
-
-    int32_t first_index = -1, last_index = -1;
-
-    for (i = 0; i < num_fft_bins; i++) {
-
-      float freq = (fft_bin_width * i);  // center freq of this fft bin.
-      float mel = MelScale(freq);
-      this_bin[i] = 0.0;
-
-      if (mel > left_mel && mel < right_mel) {
-        float weight;
-        if (mel <= center_mel) {
-          weight = (mel - left_mel) / (center_mel - left_mel);
-        } else {
-          weight = (right_mel-mel) / (right_mel-center_mel);
-        }
-        this_bin[i] = weight;
-        if (first_index == -1)
-          first_index = i;
-        last_index = i;
-      }
-    }
-
-    mfcc->fbank_filter_first[bin] = first_index;
-    mfcc->fbank_filter_last[bin] = last_index;
-	//size = size + size % 16;
-    mel_fbank[bin] = mfcc_malloc(sizeof(float) * ((size_t)last_index - first_index + 1));
-
-    int32_t j = 0;
-    //copy the part we care about
-    for (i = first_index; i <= last_index; i++) {
-		mel_fbank[bin][j++] = this_bin[i];
-    }
-  }
-  mfcc_free(this_bin);
-  return mel_fbank;
+float ** create_mel_fbank(mfcc_t *mfcc) {
+
+    // compute points evenly spaced in mels
+    float mel_low_freq = MelScale(MEL_LOW_FREQ);
+    float mel_high_freq = MelScale(MEL_HIGH_FREQ);
+    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (mfcc->num_fbank +1);
+
+    float * bin =  mfcc_malloc(sizeof(float) * mfcc->num_fbank+2);
+    for (int i=0; i<mfcc->num_fbank+2; i++)
+    {
+        bin[i] = mel_low_freq + mel_freq_delta*i;
+        bin[i] = floor((mfcc->frame_len_padded+1)*InverseMelScale(bin[i])/SAMP_FREQ);
+    }
+
+    float ** mel_fbank =  mfcc_malloc(sizeof(float*) * mfcc->num_fbank);
+
+    for (int j=0; j<mfcc->num_fbank; j++) {
+            mel_fbank[j] = mfcc_malloc(sizeof(float) * (mfcc->frame_len_padded/2+1));
+            for (int i=(int)bin[j]; i<(int)bin[j+1]; i++)
+                mel_fbank[j][i] = (i - bin[j]) / (bin[j+1]-bin[j]);
+            for (int i=(int)bin[j+1]; i<(int)bin[j+2]; i++)
+                mel_fbank[j][i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1]);
+            }
+
+    mfcc_free(bin);
+    return mel_fbank;
+
 }
 
-void mfcc_compute(mfcc_t *mfcc, const int16_t * audio_data, float* mfcc_out) 
+void mfcc_compute(mfcc_t *mfcc, const int16_t * audio_data, float* mfcc_out)
 {
 	int32_t i, j, bin;
 
-	//1. TensorFlow way of normalizing .wav data to (-1,1) and 2. do pre-emphasis. 
-	float last = (float)audio_data[0] / (1 << 15);
-	mfcc->frame[0] = last;
+	//1. TensorFlow way of normalizing .wav data to (-1,1) and 2. do pre-emphasis.
+	float last = (float)audio_data[0];
+	mfcc->frame[0] = last / (1 << 15);
 	for (i = 1; i < mfcc->frame_len; i++) {
 		mfcc->frame[i] = ((float)audio_data[i] - last * mfcc->preempha) / (1<<15);
 		last = (float)audio_data[i];
 	}
-	
 	//Fill up remaining with zeros
 	if(mfcc->frame_len_padded - mfcc->frame_len)
 		memset(&mfcc->frame[mfcc->frame_len], 0, sizeof(float) * (mfcc->frame_len_padded - mfcc->frame_len));
@@ -291,8 +262,8 @@ void mfcc_compute(mfcc_t *mfcc, const int16_t * audio_data, float* mfcc_out)
 		mfcc->buffer[i] = real*real + im*im;
 	}
 	mfcc->buffer[0] = first_energy;
-	mfcc->buffer[half_dim] = last_energy;  
-	
+	mfcc->buffer[half_dim] = last_energy;
+
 #else // end of ARM_fft
 	// not yet optimized for memory
 	float *data_re = mfcc->fft_buffer;
@@ -304,54 +275,44 @@ void mfcc_compute(mfcc_t *mfcc, const int16_t * audio_data, float* mfcc_out)
 	fft(data_re, data_im, mfcc->frame_len_padded);
 	// only need half (N/2+1)
 	for (int i = 0; i <= mfcc->frame_len_padded/2; i++) {
-		mfcc->buffer[i] = data_re[i] * data_re[i] + data_im[i]* data_im[i];
+		mfcc->buffer[i] = (data_re[i] * data_re[i] + data_im[i]* data_im[i])/mfcc->frame_len_padded;
 	}
 #endif
 
 	float sqrt_data;
 	//Apply mel filterbanks
-	for (bin = 0; bin < mfcc->num_fbank ; bin++) 
-	{
-		j = 0;
+	for (bin = 0; bin < mfcc->num_fbank ; bin++)
+	{
 		float mel_energy = 0;
-		int32_t first_index = mfcc->fbank_filter_first[bin];
-		int32_t last_index = mfcc->fbank_filter_last[bin];
-		for (i = first_index; i <= last_index; i++) {
-			mel_energy += mfcc->buffer[i] * mfcc->mel_fbank[bin][j++];
+		for (i = 0; i < mfcc->frame_len_padded/2+1; i++) {
+			mel_energy += mfcc->buffer[i] * mfcc->mel_fbank[bin][i];
 		}
-		mfcc->mel_energies[bin] = mel_energy / mfcc->frame_len_padded;
+		mfcc->mel_energies[bin] = mel_energy;
 
 		//avoid log of zero
 		if (mel_energy == 0.0f)
 			mfcc->mel_energies[bin] = FLT_MIN;
-	}
+	}
 
 	//Take log
 	float total_energy = 0;
 	for (bin = 0; bin < mfcc->num_fbank; bin++)
 	{
 		total_energy += mfcc->mel_energies[bin];
-		mfcc->mel_energies[bin] = logf(mfcc->mel_energies[bin]);	
+		mfcc->mel_energies[bin] = logf(mfcc->mel_energies[bin]);
 	}
-
 	//Take DCT. Uses matrix mul.
 	int out_index = 0;
-	for (i = mfcc->num_features_offset; i < mfcc->num_mfcc_features; i++) 
+	for (i = mfcc->num_features_offset; i < mfcc->num_mfcc_features; i++)
 	{
 		float sum = 0.0;
-		for (j = 0; j < mfcc->num_fbank ; j++) 
+		for (j = 0; j < mfcc->num_fbank ; j++)
 		{
 			sum += mfcc->dct_matrix[i*mfcc->num_fbank +j] * mfcc->mel_energies[j];
 		}
 		mfcc_out[out_index] = sum;
 		out_index ++;
-	}
-
-	// whether replace the first energy by log of total energy
-	if (mfcc->is_append_energy)
-	{
-		mfcc_out[0] = logf(total_energy);
-	}
+	}
 
 }
 
diff --git a/examples/keyword_spotting/mfcc.h b/examples/keyword_spotting/mfcc.h
index dfef67a2..d19a14eb 100644
--- a/examples/keyword_spotting/mfcc.h
+++ b/examples/keyword_spotting/mfcc.h
@@ -23,7 +23,7 @@
 #define __MFCC_H__
 
 
-// in main.c define "PLATFORM_ARM" before including 'mfcc.h' to use ARM optimized FFT 
+// in main.c define "PLATFORM_ARM" before including 'mfcc.h' to use ARM optimized FFT
 #ifdef PLATFORM_ARM
 #include "arm_math.h"
 #define MFCC_PLATFORM_ARM
@@ -63,14 +63,14 @@ typedef struct _mfcc_t{
 } mfcc_t;
 
 static inline float InverseMelScale(float mel_freq) {
-  return 700.0f * (expf (mel_freq / 1127.0f) - 1.0f);
+  return 700.0f * (pow(10,(mel_freq / 2595.0f)) - 1.0f);
 }
 
 static inline float MelScale(float freq) {
-  return 1127.0f * logf (1.0f + freq / 700.0f);
+  return 2595.0f * log10(1.0f + freq / 700.0f);
 }
 
-float * create_dct_matrix(int32_t input_length, int32_t coefficient_count); 
+float * create_dct_matrix(int32_t input_length, int32_t coefficient_count);
 float ** create_mel_fbank(mfcc_t* mfcc);
 
 mfcc_t *mfcc_create(int num_mfcc_features, int feature_offset, int num_fbank, int frame_len, float preempha, int is_append_energy);
diff --git a/examples/keyword_spotting/mfcc.py b/examples/keyword_spotting/mfcc.py
index 0a046d79..a7906f07 100644
--- a/examples/keyword_spotting/mfcc.py
+++ b/examples/keyword_spotting/mfcc.py
@@ -27,8 +27,7 @@ def generate_mfcc(sig, rate, sig_len, noise=None, noise_weight=0.1, winlen=0.032
         if(len(sig) >sig_len):
             sig = sig[0:sig_len]
     # i dont know, 'tensorflow' normalization
-    sig = sig.astype('float') / 32768
-
+    sig = sig.astype('float32') / 32768
     if(noise is not None):
         noise = noise[random.randint(0, len(noise)-1)] # pick a noise
         start = random.randint(0, len(noise)-sig_len) # pick a sequence
@@ -59,7 +58,6 @@ def merge_mfcc_file(input_path='dat/', mix_noise=True, sig_len=16000, winlen=0.0
         test_list = f.read()
     with open(input_path +  'validation_list.txt', 'r') as f:
         validate_list = f.read()
-
     files = os.listdir(input_path)
     for fi in files:
         fi_d = os.path.join(input_path, fi)
@@ -68,6 +66,7 @@ def merge_mfcc_file(input_path='dat/', mix_noise=True, sig_len=16000, winlen=0.0
             label = fi_d.split('/')[1] # get the label from the dir
             print(label)
             # noise in training
+            
             if 'noise' in label:
                 for f in os.listdir(fi_d):
                     filename = f
@@ -76,6 +75,7 @@ def merge_mfcc_file(input_path='dat/', mix_noise=True, sig_len=16000, winlen=0.0
                     f = os.path.join(fi_d, f)
                     (rate, sig) = wav.read(f)
                     for i in range(0, len(sig), sig_len):
+                        
                         data = generate_mfcc(sig[i:i+sig_len], rate, sig_len, winlen=winlen, winstep=winstep, numcep=numcep,
                                              nfilt=nfilt, nfft=nfft, lowfreq=lowfreq,
                                              highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
@@ -87,22 +87,31 @@ def merge_mfcc_file(input_path='dat/', mix_noise=True, sig_len=16000, winlen=0.0
             # dataset
             for f in os.listdir(fi_d):
                 filename = f
+                
                 f = os.path.join(fi_d, f)
                 (rate, sig) = wav.read(f)
-                data = generate_mfcc(sig, rate, sig_len, noise=noise, winlen=winlen, winstep=winstep, numcep=numcep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq,
-                     highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
-                data = np.array(data) # ?? no idea why this works
 
                 # split dataset into train, test, validate
+                
                 if filename in test_list:
+                    data = generate_mfcc(sig, rate, sig_len, winlen=winlen, winstep=winstep, numcep=numcep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq, highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
+                    data = np.array(data) # ?? no idea why this works
+                    
                     test_data.append(data)
                     test_label.append(label)
+                
                 elif filename in validate_list:
+                    data = generate_mfcc(sig, rate, sig_len, winlen=winlen, winstep=winstep, numcep=numcep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq, highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
+                    data = np.array(data) # ?? no idea why this works
                     validate_data.append(data)
                     validate_label.append(label)
                 else:
+                    data = generate_mfcc(sig, rate, sig_len, noise=noise, winlen=winlen, winstep=winstep, numcep=numcep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq, highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
+                    data = np.array(data) # ?? no idea why this works
                     train_data.append(data)
                     train_lable.append(label)
+                
+
 
     # finalize
     train_data = np.array(train_data)
@@ -116,14 +125,14 @@ def merge_mfcc_file(input_path='dat/', mix_noise=True, sig_len=16000, winlen=0.0
 
     # test
     (x_train, y_train), (x_test, y_test), (x_val, y_val) = merge_mfcc_file()
-
+    
     np.save('train_data.npy', x_train)
     np.save('train_label.npy', y_train)
     np.save('test_data.npy', x_test)
     np.save('test_label.npy', y_test)
     np.save('val_data.npy', x_val)
     np.save('val_label.npy', y_val)
-
+    
     print('x_train shape:', x_train.shape, 'max', x_train.max(), 'min', x_train.min())
 
     mfcc_feat = x_train[3948]
diff --git a/examples/keyword_spotting/python_speech_features/base.py b/examples/keyword_spotting/python_speech_features/base.py
index acf72444..1afe8ee0 100644
--- a/examples/keyword_spotting/python_speech_features/base.py
+++ b/examples/keyword_spotting/python_speech_features/base.py
@@ -44,10 +44,13 @@ def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
     """
     nfft = nfft or calculate_nfft(samplerate, winlen)
     feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
+    #print(type(feat[0][0]))
+
     feat = numpy.log(feat)
-    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
+    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]    
     feat = lifter(feat,ceplifter)
     if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
+
     return feat
 
 def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
@@ -70,15 +73,15 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
     """
     highfreq= highfreq or samplerate/2
     signal = sigproc.preemphasis(signal,preemph)
-    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
+    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)  
     pspec = sigproc.powspec(frames,nfft)
+
+    
     energy = numpy.sum(pspec,1) # this stores the total energy in each frame
     energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
-
     fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
     feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
     feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
-
     return feat,energy
 
 def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
@@ -136,7 +139,7 @@ def hz2mel(hz):
     :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
     :returns: a value in Mels. If an array was passed in, an identical sized array is returned.
     """
-    return 2595 * numpy.log10(1+hz/700.)
+    return 2595 * numpy.log10(1+hz/700.).astype('float32')
 
 def mel2hz(mel):
     """Convert a value in Mels to Hertz
@@ -163,12 +166,12 @@ def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
     # compute points evenly spaced in mels
     lowmel = hz2mel(lowfreq)
     highmel = hz2mel(highfreq)
-    melpoints = numpy.linspace(lowmel,highmel,nfilt+2)
+    melpoints = numpy.linspace(lowmel,highmel,nfilt+2).astype('float32')
     # our points are in Hz, but we use fft bins, so we have to convert
     #  from Hz to fft bin number
-    bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate)
+    bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate).astype('float32')
 
-    fbank = numpy.zeros([nfilt,nfft//2+1])
+    fbank = numpy.zeros([nfilt,nfft//2+1]).astype('float32')
     for j in range(0,nfilt):
         for i in range(int(bin[j]), int(bin[j+1])):
             fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
diff --git a/examples/keyword_spotting/python_speech_features/sigproc.py b/examples/keyword_spotting/python_speech_features/sigproc.py
index a786c4fb..9aba4ea9 100644
--- a/examples/keyword_spotting/python_speech_features/sigproc.py
+++ b/examples/keyword_spotting/python_speech_features/sigproc.py
@@ -38,10 +38,13 @@ def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), str
 
     padlen = int((numframes - 1) * frame_step + frame_len)
 
-    zeros = numpy.zeros((padlen - slen,))
+    zeros = numpy.zeros((padlen - slen,)).astype('float32')
     padsignal = numpy.concatenate((sig, zeros))
     if stride_trick:
-        win = winfunc(frame_len)
+        win = winfunc(frame_len).astype('float32')
+        for i in range (0,512):
+            win[i] = 0.5 - 0.5*numpy.cos(numpy.pi*2 * (i / 512))
+        win = win.astype('float32')
         frames = rolling_window(padsignal, window=frame_len, step=frame_step)
     else:
         indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
@@ -49,7 +52,6 @@ def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), str
         indices = numpy.array(indices, dtype=numpy.int32)
         frames = padsignal[indices]
         win = numpy.tile(winfunc(frame_len), (numframes, 1))
-
     return frames * win
 
 
@@ -100,7 +102,7 @@ def magspec(frames, NFFT):
             'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.',
             numpy.shape(frames)[1], NFFT)
     complex_spec = numpy.fft.rfft(frames, NFFT)
-    return numpy.absolute(complex_spec)
+    return numpy.absolute(complex_spec).astype('float32')
 
 
 def powspec(frames, NFFT):
@@ -110,7 +112,7 @@ def powspec(frames, NFFT):
     :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
     :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame.
     """
-    return 1.0 / NFFT * numpy.square(magspec(frames, NFFT))
+    return 1.0 / NFFT * numpy.square(magspec(frames, NFFT)).astype('float32')
 
 
 def logpowspec(frames, NFFT, norm=1):
@@ -123,7 +125,7 @@ def logpowspec(frames, NFFT, norm=1):
     """
     ps = powspec(frames, NFFT);
     ps[ps <= 1e-30] = 1e-30
-    lps = 10 * numpy.log10(ps)
+    lps = 10 * numpy.log10(ps).astype('float32')
     if norm:
         return lps - numpy.max(lps)
     else:
@@ -137,4 +139,4 @@ def preemphasis(signal, coeff=0.95):
     :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
     :returns: the filtered signal.
     """
-    return numpy.append(signal[0], signal[1:] - coeff * signal[:-1])
+    return numpy.append(signal[0], signal[1:] - coeff * signal[:-1]).astype('float32')