From 0813ccc525cd45c1e046a28672a9cfd6cc1ffe89 Mon Sep 17 00:00:00 2001
From: Pashmina Cameron <pacamero@microsoft.com>
Date: Thu, 14 Nov 2019 12:19:45 +0000
Subject: [PATCH] Added a more general sequence LSTM implementation

---
 src/python/ksc/adbench_lstm/lstm2.py | 84 ++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 src/python/ksc/adbench_lstm/lstm2.py

diff --git a/src/python/ksc/adbench_lstm/lstm2.py b/src/python/ksc/adbench_lstm/lstm2.py
new file mode 100644
index 000000000..75c99c848
--- /dev/null
+++ b/src/python/ksc/adbench_lstm/lstm2.py
@@ -0,0 +1,84 @@
+import numpy as np
+
+"""
+There are many formulations of LSTMs. This code follows the formulation from 
+https://cs224d.stanford.edu/lecture_notes/LectureNotes4.pdf with some simplifications
+"""
+
+def sigmoid(x):
+    return 1.0 / (1.0 + np.exp(-x))
+
+# LSTM functions
+def lstm_step(x, hidden_state, cell_state, Wx, Wh, b):
+    """
+    Forward pass for a single timestep of an LSTM.
+    The input data has dimension D, the hidden state has dimension H, and we use
+    a minibatch size of B.
+    Inputs:
+        x: Input data, of shape (S, D)
+        hidden_state: Previous hidden state, of shape (S, H)
+        cell_state: previous cell state, of shape (S, H)
+        Wx: Input-to-hidden weights, of shape (D, 4*H)
+        Wh: Hidden-to-hidden weights, of shape (H, 4*H)
+        b: Biases, of shape (4*H)
+    Returns a tuple of:
+        next_hidden_state: Next hidden state, of shape (B, H)
+        next_cell_state: Next cell state, of shape (B, H)
+    """
+    _, hidden_size = hidden_state.shape
+    initial = hidden_state.dot(Wh) + x.dot(Wx) + b               
+
+    ingate = sigmoid(initial[:, 0:hidden_size])
+    forget = sigmoid(initial[:, hidden_size:2*hidden_size])
+    outgate = sigmoid(initial[:, 2*hidden_size:3*hidden_size])
+    change = np.tanh(initial[:, 3*hidden_size:4*hidden_size])    
+    
+    next_cell_state = forget * cell_state + ingate * change      
+    next_hidden_state = outgate * (np.tanh(next_cell_state))     
+    
+    return next_hidden_state, next_cell_state
+
+def lstm(x, hidden_size):
+    """
+    For sequence length S, input dimesnion D, hidden_size H
+    Run an LSTM over T timesteps
+    Inputs:
+        x: Input data of shape T x (S, D)
+        hidden_size: hidden dimension of the LSTM
+    Returns a tuple of:
+        all_hidden: Hidden states for all timesteps of all sequences, of shape T x (S, H)
+        cell_state: Last cell state of shape (S, H)
+    """
+    if len(x) == 0:
+        return None, None
+    
+    seq_len, input_dim = x[0].shape  
+    
+    hidden_state = np.zeros((seq_len, hidden_size), dtype=np.float64)
+    cell_state = np.zeros((seq_len, hidden_size), dtype=np.float64)
+    Wx = np.random.rand(input_dim, 4*hidden_size)
+    Wh = np.random.rand(hidden_size, 4*hidden_size)
+    b = np.zeros((4*hidden_size))
+
+    all_hidden = []
+    for xt in x:   
+        next_hidden_state, next_cell_state = lstm_step(xt, hidden_state, cell_state, Wx, Wh, b)
+        hidden_state = next_hidden_state
+        cell_state = next_cell_state
+        all_hidden.append(next_hidden_state)
+    
+    return all_hidden, cell_state
+
+
+### Test the LSTM with some sample data 
+T, S, H, D = 17, 13, 7, 5
+x = [np.random.rand(S,D) for _ in range(T)]
+## Pass through an LSTM for T timesteps.
+# This will produce the following output
+# input sequence length: 13, input dimension: 5, hidden size: 7, time steps 17
+# input shape: (13, 5)
+# hidden state size: 17 x (13, 7)   cell state size: (13, 7)
+print(f"input sequence length: {S}, input dimension: {D}, hidden size: {H}, time steps {T} \n")
+print(f"input shape: {x[0].shape} ")
+hidden_state, cell_state = lstm(x, H)
+print(f"hidden state size: {len(hidden_state)} x {hidden_state[0].shape}   cell state size: {cell_state.shape}")