-
Notifications
You must be signed in to change notification settings - Fork 0
/
stock_price_prediction.py
129 lines (91 loc) · 4.45 KB
/
stock_price_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
"""stock_price_prediction.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1i22IFqYBsJU4khJgNlNAi3XQgr2zDbez
# Recurrent Neural Network
## Part 1 - Data Preprocessing
### Importing the libraries
"""
# importing essential libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
"""### Importing the training set"""
# this program assumes the dataset is in single column
dataset_train = pd.read_csv('file name in csv')
training_set = dataset_train.iloc[:, 1:2].values
"""### Feature Scaling"""
# performing scaling of the dataset as the numbers are too large for computation and there is a chance of exploding gradients
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
"""### Creating a data structure with 60 timesteps and 1 output"""
# In this step we have assumed that the timesteps fo prediction of the stock price for next timestep is 60
# you can take as many steps as you wish and work your own way out with performance of your dataset
# if observe this correctly we are transforming the single column data into some rows each having 60 inputs and 1 output
X_train = []
y_train = []
for i in range(60, 1258):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
"""### Reshaping"""
# input to an lstm should be in form of (batch_size,time_steps,dimension) and the data is single dimensional
# if your dataset has other features and already predefined and labelled implies dimension=number of features
# this datset assumes no labelled data and only a single column of opening stock prices
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
"""## Part 2 - Building and Training the RNN
### Importing the Keras libraries and packages
"""
# importing the keras module essential for training the deep learning model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
"""### Initialising the RNN"""
# architecture
regressor = Sequential()
"""### Adding the first LSTM layer and some Dropout regularisation"""
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))
"""### Adding a second LSTM layer and some Dropout regularisation"""
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
"""### Adding a third LSTM layer and some Dropout regularisation"""
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
"""### Adding a fourth LSTM layer and some Dropout regularisation"""
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
"""### Adding the output layer"""
regressor.add(Dense(units = 1))
"""### Compiling the RNN"""
# compiling to find optimized parameters
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
"""### Fitting the RNN to the Training set"""
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)
"""## Part 3 - Making the predictions and visualising the results
### Getting the real stock price of 2017
"""
dataset_test = pd.read_csv('Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values
"""### Getting the predicted stock price of 2017"""
# this code assumes a random number of test cases of 20 hence the range below is from (60,80) you can change your range according
# to your data
# here we also need to change the test data to suit the architecture and parameters
# we are concatenating the test and train data along the column
dataset_total = pd.concat((dataset_train['column_name'], dataset_test['column_name']), axis = 0)
# we are taking the previous 60 timesteps from train data so as to for the first row vector of thetest data, hence we concatenated above
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
# change range
for i in range(60, 80):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)
# we need to predict the values not the transformed figures,hence applied the inverse transform
predicted_stock_price = sc.inverse_transform(predicted_stock_price)