-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit.py
151 lines (125 loc) · 6.07 KB
/
submit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import numpy as np
# This is the only scipy method you are allowed to use
# Use of scipy is not allowed otherwise
from scipy.linalg import khatri_rao
import random as rnd
import time as tm
# SUBMIT YOUR CODE AS A SINGLE PYTHON (.PY) FILE INSIDE A ZIP ARCHIVE
# THE NAME OF THE PYTHON FILE MUST BE submit.py
# DO NOT INCLUDE PACKAGES LIKE SKLEARN, SCIPY, KERAS ETC IN YOUR CODE
# THE USE OF ANY MACHINE LEARNING LIBRARIES FOR WHATEVER REASON WILL RESULT IN A STRAIGHT ZERO
# THIS IS BECAUSE THESE PACKAGES CONTAIN SOLVERS WHICH MAKE THIS ASSIGNMENT TRIVIAL
# THE ONLY EXCEPTION TO THIS IS THE USE OF THE KHATRI-RAO PRODUCT METHOD FROM THE SCIPY LIBRARY
# HOWEVER, NOTE THAT NO OTHER SCIPY METHOD MAY BE USED IN YOUR CODE
# DO NOT CHANGE THE NAME OF THE METHODS solver, get_features, get_renamed_labels BELOW
# THESE WILL BE INVOKED BY THE EVALUATION SCRIPT. CHANGING THESE NAMES WILL CAUSE EVALUATION FAILURE
# You may define any new functions, variables, classes here
# For example, functions to calculate next coordinate or step length
################################
# Non Editable Region Starting #
################################
def get_renamed_labels( y ):
################################
# Non Editable Region Ending #
################################
y_new = 1-2*y
# Since the dataset contain 0/1 labels and SVMs prefer -1/+1 labels,
# Decide here how you want to rename the labels
# For example, you may map 1 -> 1 and 0 -> -1 or else you may want to go with 1 -> -1 and 0 -> 1
# Use whatever convention you seem fit but use the same mapping throughout your code
# If you use one mapping for train and another for test, you will get poor accuracy
return y_new.reshape((y_new.size,))
################################
# Non Editable Region Starting #
################################
def get_features( X ):
################################
# Non Editable Region Ending #
################################
mapped_X = 1-2*X
X = np.flip(np.cumprod( np.flip( mapped_X , axis = 1 ), axis = 1 ), axis=1)
X = np.c_[X,np.ones(X.shape[0])]
a = np.transpose(X)
b = khatri_rao(a,a)
X_new = np.transpose(khatri_rao(a,b))
# Use this function to transform your input features (that are 0/1 valued)
# into new features that can be fed into a linear model to solve the problem
# Your new features may have a different dimensionality than the input features
# For example, in this application, X will be 8 dimensional but your new
# features can be 2 dimensional, 10 dimensional, 1000 dimensional, 123456 dimensional etc
# Keep in mind that the more dimensions you use, the slower will be your solver too
# so use only as many dimensions as are absolutely required to solve the problem
return X_new
################################
# Non Editable Region Starting #
################################
def solver( X, y, timeout, spacing ):
(n, d) = X.shape
t = 0
totTime = 0
# W is the model vector and will get returned once timeout happens
# B is the bias term that will get returned once timeout happens
# The bias term is optional. If you feel you do not need a bias term at all, just keep it set to 0
# However, if you do end up using a bias term, you are allowed to internally use a model vector
# that hides the bias inside the model vector e.g. by defining a new variable such as
# W_extended = np.concatenate( ( W, [B] ) )
# However, you must maintain W and B variables separately as well so that they can get
# returned when timeout happens. Take care to update W, B whenever you update your W_extended
# variable otherwise you will get wrong results.
# Also note that the dimensionality of W may be larger or smaller than 9
W = []
B = 0
tic = tm.perf_counter()
################################
# Non Editable Region Ending #
################################
# You may reinitialize W, B to your liking here e.g. set W to its correct dimensionality
# You may also define new variables here e.g. step_length, mini-batch size etc
y = get_renamed_labels(y)
X = get_features(X)
(n, d) = X.shape
W = np.zeros(d)
eta = 0.12
lambda_para = 0.0001
itr = 0
################################
# Non Editable Region Starting #
################################
while True:
t = t + 1
if t % spacing == 0:
toc = tm.perf_counter()
totTime = totTime + (toc - tic)
if totTime > timeout:
return ( W.reshape( ( W.size, ) ), B, totTime )
else:
tic = tm.perf_counter()
################################
# Non Editable Region Ending #
################################
# Write all code to perform your method updates here within the infinite while loop
# The infinite loop will terminate once timeout is reached
# Do not try to bypass the timer check e.g. by using continue
# It is very easy for us to detect such bypasses which will be strictly penalized
# Note that most likely, you should be using get_features( X ) and get_renamed_labels( y )
# in this part of the code instead of X and y -- please take care
# Please note that once timeout is reached, the code will simply return W, B
# Thus, if you wish to return the average model (as is sometimes done for GD),
# you need to make sure that W, B store the averages at all times
# One way to do so is to define a "running" variable w_run, b_run
# Make all GD updates to W_run e.g. W_run = W_run - step * delW (similarly for B_run)
# Then use a running average formula to update W (similarly for B)
# W = (W * (t-1) + W_run)/t
# This way, W, B will always store the averages and can be returned at any time
# In this scheme, W, B play the role of the "cumulative" variables in the course module optLib (see the cs771 library)
# W_run, B_run on the other hand, play the role of the "theta" variable in the course module optLib (see the cs771 library)
#stochastic gradient descent for opt
itr += 1
i=rnd.randint(0,y.shape[0]-1)
discriminent= y[i] * np.dot(X[i],W)
if (discriminent >=1):
dw = lambda_para * W
else:
dw = lambda_para * W - y[i] * X[i]
W = W - eta/(np.sqrt(itr)) * dw
return ( W.reshape( ( W.size, ) ), B, totTime ) # This return statement will never be reached