-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtimeGRiD.cu
193 lines (162 loc) · 9.04 KB
/
timeGRiD.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
/***
nvcc -std=c++11 -o timeGRiD.exe timeGRiD.cu -gencode arch=compute_86,code=sm_86 -O3 -ftz=true -prec-div=false -prec-sqrt=false
***/
#include "util/experiment_helpers.h" // include constants and other experiment consistency helpers
#include "grid.cuh"
dim3 dimms(grid::SUGGESTED_THREADS,1,1); // all loops are single loops (all mat mult flattened into column opps)
#define GRAVITY 9.81
template <typename T, int TEST_ITERS>
__host__
void test(int NUM_TIMESTEPS, cudaStream_t *streams, grid::robotModel<T> *d_robotModel, grid::gridData<T> *hd_data){
#if TEST_FOR_EQUIVALENCE
printf("q,qd,u\n");
printMat<T,1,grid::NUM_JOINTS>(hd_data->h_q_qd_u,1);
printMat<T,1,grid::NUM_JOINTS>(&hd_data->h_q_qd_u[grid::NUM_JOINTS],1);
printMat<T,1,grid::NUM_JOINTS>(&hd_data->h_q_qd_u[2*grid::NUM_JOINTS],1);
grid::inverse_dynamics<T,false,true>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
grid::direct_minv<T,true>(hd_data,d_robotModel,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
grid::forward_dynamics<T>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
grid::inverse_dynamics_gradient<T,false,true>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
grid::forward_dynamics_gradient<T,false>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
printf("c\n");
printMat<T,1,grid::NUM_JOINTS>(hd_data->h_c,1);
printf("Minv\n");
printMat<T,grid::NUM_JOINTS,grid::NUM_JOINTS>(hd_data->h_Minv,grid::NUM_JOINTS);
printf("qdd\n");
printMat<T,1,grid::NUM_JOINTS>(hd_data->h_qdd,1);
printf("dc_dq\n");
printMat<T,grid::NUM_JOINTS,grid::NUM_JOINTS>(hd_data->h_dc_du,grid::NUM_JOINTS);
printf("dc_dqd\n");
printMat<T,grid::NUM_JOINTS,grid::NUM_JOINTS>(&hd_data->h_dc_du[grid::NUM_JOINTS*grid::NUM_JOINTS],grid::NUM_JOINTS);
printf("df_dq\n");
printMat<T,grid::NUM_JOINTS,grid::NUM_JOINTS>(hd_data->h_df_du,grid::NUM_JOINTS);
printf("df_dqd\n");
printMat<T,grid::NUM_JOINTS,grid::NUM_JOINTS>(&hd_data->h_df_du[grid::NUM_JOINTS*grid::NUM_JOINTS],grid::NUM_JOINTS);
#else
// Setup timer
struct timespec start, end;
std::vector<double> times = {};
if(NUM_TIMESTEPS == 1){
// first one is done twice to wake up the GPU and get it up to full speed
grid::inverse_dynamics_single_timing<T,false,true>(hd_data,d_robotModel,GRAVITY,TEST_ITERS,dim3(1,1,1),dimms,streams);
grid::inverse_dynamics_single_timing<T,false,true>(hd_data,d_robotModel,GRAVITY,TEST_ITERS,dim3(1,1,1),dimms,streams);
grid::direct_minv_single_timing<T,true>(hd_data,d_robotModel,TEST_ITERS,dim3(1,1,1),dimms,streams);
grid::forward_dynamics_single_timing<T>(hd_data,d_robotModel,GRAVITY,TEST_ITERS,dim3(1,1,1),dimms,streams);
grid::inverse_dynamics_gradient_single_timing<T,false,true>(hd_data,d_robotModel,GRAVITY,TEST_ITERS,dim3(1,1,1),dimms,streams);
grid::forward_dynamics_gradient_single_timing<T,false>(hd_data,d_robotModel,GRAVITY,TEST_ITERS,dim3(1,1,1),dimms,streams);
}
else{
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::inverse_dynamics<T,false,true>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: ID WITH MEMORY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::inverse_dynamics_compute_only<T,false,true>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: ID COMPUTE ONLY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::direct_minv<T,true>(hd_data,d_robotModel,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: Minv WITH MEMORY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::direct_minv_compute_only<T,true>(hd_data,d_robotModel,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: Minv COMPUTE ONLY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::forward_dynamics<T>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: FD WITH MEMORY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::forward_dynamics_compute_only<T>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: FD COMPUTE ONLY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::inverse_dynamics_gradient<T,false,true>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: ID_DU WITH MEMORY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::inverse_dynamics_gradient_compute_only<T,false,true>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: ID_DU COMPUTE ONLY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::forward_dynamics_gradient<T,false>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms,streams);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: FD_DU WITH MEMORY: ",NUM_TIMESTEPS); printStats(×); times.clear();
for(int iter = 0; iter < TEST_ITERS; iter++){
clock_gettime(CLOCK_MONOTONIC,&start);
grid::forward_dynamics_gradient_compute_only<T,false>(hd_data,d_robotModel,GRAVITY,NUM_TIMESTEPS,dim3(NUM_TIMESTEPS,1,1),dimms);
clock_gettime(CLOCK_MONOTONIC,&end);
times.push_back(time_delta_us_timespec(start,end));
}
printf("[N:%d]: FD_DU COMPUTE ONLY: ",NUM_TIMESTEPS); printStats(×); times.clear();
}
#endif
}
template<typename T, int TEST_ITERS>
void run_all_tests(){
// allocate memory for max of what we need
const int MAX_TIMESTEPS = 256;
cudaStream_t *streams = grid::init_grid<T>();
grid::robotModel<T> *d_robotModel = grid::init_robotModel<T>();
grid::gridData<T> *hd_data = grid::init_gridData<T,MAX_TIMESTEPS>();
// load q,qd,u
for(int k = 0; k < MAX_TIMESTEPS; k++){
for(int ind = 0; ind < grid::NUM_JOINTS; ind++){
// get values
T val1 = getRand<double>(); T val2 = getRand<double>(); T val3 = getRand<double>();
hd_data->h_q_qd_u[k*3*grid::NUM_JOINTS + ind] = val1;
hd_data->h_q_qd_u[k*3*grid::NUM_JOINTS + grid::NUM_JOINTS + ind] = val2;
hd_data->h_q_qd_u[k*3*grid::NUM_JOINTS + 2*grid::NUM_JOINTS + ind] = val3;
// load into alternate memory sizes
hd_data->h_q_qd[k*2*grid::NUM_JOINTS + ind] = val1;
hd_data->h_q_qd[k*2*grid::NUM_JOINTS + grid::NUM_JOINTS + ind] = val2;
hd_data->h_q[k*grid::NUM_JOINTS + ind] = val1;
}
}
// copy values onto the GPU as default values (we will do more transfers later but this ensures things are initialized)
gpuErrchk(cudaMemcpy(hd_data->d_q_qd_u,hd_data->h_q_qd_u,3*grid::NUM_JOINTS*MAX_TIMESTEPS*sizeof(T),cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(hd_data->d_q_qd,hd_data->h_q_qd,2*grid::NUM_JOINTS*MAX_TIMESTEPS*sizeof(T),cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(hd_data->d_q,hd_data->h_q,grid::NUM_JOINTS*MAX_TIMESTEPS*sizeof(T),cudaMemcpyHostToDevice));
gpuErrchk(cudaDeviceSynchronize());
// then run the tests
test<T,TEST_ITERS*10>(1,streams,d_robotModel,hd_data); // more iters for single test
#if !TEST_FOR_EQUIVALENCE
test<T,TEST_ITERS>(16,streams,d_robotModel,hd_data);
test<T,TEST_ITERS>(32,streams,d_robotModel,hd_data);
test<T,TEST_ITERS>(64,streams,d_robotModel,hd_data);
test<T,TEST_ITERS>(128,streams,d_robotModel,hd_data);
test<T,TEST_ITERS>(256,streams,d_robotModel,hd_data);
#endif
// free all memory and exit
grid::close_grid<T>(streams,d_robotModel,hd_data);
}
int main(void){
run_all_tests<float,TEST_ITERS_GLOBAL>(); return 0;
}