stock_price_prediction.py

# -*- coding: utf-8 -*-
"""stock_price_prediction.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1i22IFqYBsJU4khJgNlNAi3XQgr2zDbez

# Recurrent Neural Network

## Part 1 - Data Preprocessing

### Importing the libraries
"""

# importing essential libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

"""### Importing the training set"""

# this program assumes the dataset is in single column
dataset_train = pd.read_csv('file name in csv')
training_set = dataset_train.iloc[:, 1:2].values

"""### Feature Scaling"""

# performing scaling of the dataset as the numbers are too large for computation and there is a chance of exploding gradients 
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

"""### Creating a data structure with 60 timesteps and 1 output"""

# In this step we have assumed that the timesteps fo prediction of the stock price for next timestep is 60 
# you can take as many steps as you wish and work your own way out with performance of your dataset
# if observe this correctly we are transforming the single column data into some rows each having 60 inputs and 1 output 
X_train = []
y_train = []
for i in range(60, 1258):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

"""### Reshaping"""

# input to an lstm should be in form of (batch_size,time_steps,dimension) and the data is single dimensional
# if your dataset has other features and already predefined and labelled implies dimension=number of features
# this datset assumes no labelled data and only a single column of opening stock prices
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

"""## Part 2 - Building and Training the RNN

### Importing the Keras libraries and packages
"""

# importing the keras module essential for training the deep learning model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

"""### Initialising the RNN"""

# architecture
regressor = Sequential()

"""### Adding the first LSTM layer and some Dropout regularisation"""

regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

"""### Adding a second LSTM layer and some Dropout regularisation"""

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

"""### Adding a third LSTM layer and some Dropout regularisation"""

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

"""### Adding a fourth LSTM layer and some Dropout regularisation"""

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

"""### Adding the output layer"""

regressor.add(Dense(units = 1))

"""### Compiling the RNN"""

# compiling to find optimized parameters
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

"""### Fitting the RNN to the Training set"""

regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)

"""## Part 3 - Making the predictions and visualising the results

### Getting the real stock price of 2017
"""

dataset_test = pd.read_csv('Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values

"""### Getting the predicted stock price of 2017"""

# this code assumes a random number of test cases of 20 hence the range below is from (60,80) you can change your range according
# to your data
# here we also need to change the test data to suit the architecture and parameters
# we are concatenating the test and train data along the column
dataset_total = pd.concat((dataset_train['column_name'], dataset_test['column_name']), axis = 0)
# we are taking the previous 60 timesteps from train data so as to for the first row vector of thetest data, hence we concatenated above
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
# change range
for i in range(60, 80):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
# we need to predict the values not the transformed figures,hence applied the inverse transform
predicted_stock_price = sc.inverse_transform(predicted_stock_price)