-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHttpUtils.cc
281 lines (244 loc) · 7.76 KB
/
HttpUtils.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
/*
* Copyright ©2022 Hal Perkins. All rights reserved. Permission is
* hereby granted to students registered for University of Washington
* CSE 333 for use solely during Spring Quarter 2022 for purposes of
* the course. No other use, copying, distribution, or modification
* is permitted without prior written consent. Copyrights for
* third-party components of this work must be honored. Instructors
* interested in reusing these course materials should contact the
* author.
*/
// This file contains a number of HTTP and HTML parsing routines
// that come in useful throughput the assignment.
#include <arpa/inet.h>
#include <errno.h>
#include <limits.h>
#include <netdb.h>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string.hpp>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
#include <vector>
#include "./HttpUtils.h"
using boost::algorithm::replace_all;
using std::cerr;
using std::cout;
using std::endl;
using std::map;
using std::pair;
using std::string;
using std::vector;
namespace hw4 {
bool IsPathSafe(const string& root_dir, const string& test_file) {
// rootdir is a directory path. testfile is a path to a file.
// return whether or not testfile is within rootdir.
// Be sure that your code handles the case when "." and ".."
// are found in the path for testfile.
//
// (HINT: It can be tricky to handle all of the cases where "."
// and ".." show up in the passed in file paths. It would be easier
// to just handle the absolute paths for the files (no "." or "..").
// You may want to see if there is a C function that gets the absolute
// path of a file.)
// STEP 1
// Gets the absolute path of the test_file
char* absolute_path = realpath(test_file.c_str(), NULL);
if (absolute_path == NULL) {
return false;
}
char* root_path = realpath(root_dir.c_str(), NULL);
if (absolute_path == NULL) {
return false;
}
string abs_path_str(absolute_path);
string root_path_str(root_path);
// Checks if the absolute path contains the root directory with two conditions
// Condition 1: absolute path contains root directory
// Condition 2: root directory inside absolute path ends with an '/'
size_t pos = abs_path_str.find(root_path_str);
bool is_safe = (pos != std::string::npos) &&
(*(absolute_path + root_path_str.length()) == '/');
free(absolute_path);
free(root_path);
return is_safe;
}
string EscapeHtml(const string& from) {
string ret = from;
// Read through the passed in string, and replace any unsafe
// html tokens with the proper escape codes. The characters
// that need to be escaped in HTML are the same five as those
// that need to be escaped for XML documents. You can see an
// example in the comment for this function in HttpUtils.h and
// the rest of the characters that need to be replaced can be
// looked up online.
// STEP 2
replace_all(ret, "&", "&");
replace_all(ret, "\"", """);
replace_all(ret, "\'", "'");
replace_all(ret, "<", "<");
replace_all(ret, ">", ">");
return ret;
}
// Look for a "%XY" token in the string, where XY is a
// hex number. Replace the token with the appropriate ASCII
// character, but only if 32 <= dec(XY) <= 127.
string URIDecode(const string& from) {
string retstr;
// Loop through the characters in the string.
for (unsigned int pos = 0; pos < from.length(); pos++) {
// note: use pos+n<from.length() instead of pos<from.length-n
// to avoid overflow problems with unsigned int values
char c1 = from[pos];
char c2 = (pos+1 < from.length()) ? toupper(from[pos+1]) : ' ';
char c3 = (pos+2 < from.length()) ? toupper(from[pos+2]) : ' ';
// Special case the '+' for old encoders.
if (c1 == '+') {
retstr.append(1, ' ');
continue;
}
// Is this an escape sequence?
if (c1 != '%') {
retstr.append(1, c1);
continue;
}
// Yes. Are the next two characters hex digits?
if (!((('0' <= c2) && (c2 <= '9')) ||
(('A' <= c2) && (c2 <= 'F')))) {
retstr.append(1, c1);
continue;
}
if (!((('0' <= c3) && (c3 <= '9')) ||
(('A' <= c3) && (c3 <= 'F')))) {
retstr.append(1, c1);
continue;
}
// Yes. Convert to a code.
uint8_t code = 0;
if (c2 >= 'A') {
code = 16 * (10 + (c2 - 'A'));
} else {
code = 16 * (c2 - '0');
}
if (c3 >= 'A') {
code += 10 + (c3 - 'A');
} else {
code += (c3 - '0');
}
// Is the code reasonable?
if (!((code >= 32) && (code <= 127))) {
retstr.append(1, c1);
continue;
}
// Great! Convert and append.
retstr.append(1, static_cast<char>(code));
pos += 2;
}
return retstr;
}
void URLParser::Parse(const string& url) {
url_ = url;
// Split the URL into the path and the args components.
vector<string> ps;
boost::split(ps, url, boost::is_any_of("?"));
if (ps.size() < 1)
return;
// Store the URI-decoded path.
path_ = URIDecode(ps[0]);
if (ps.size() < 2)
return;
// Split the args into each field=val; chunk.
vector<string> vals;
boost::split(vals, ps[1], boost::is_any_of("&"));
// Iterate through the chunks.
for (unsigned int i = 0; i < vals.size(); i++) {
// Split the chunk into field, value.
string val = vals[i];
vector<string> fv;
boost::split(fv, val, boost::is_any_of("="));
if (fv.size() == 2) {
// Add the field, value to the args_ map.
args_[URIDecode(fv[0])] = URIDecode(fv[1]);
}
}
}
uint16_t GetRandPort() {
uint16_t portnum = 10000;
portnum += ((uint16_t) getpid()) % 25000;
portnum += ((uint16_t) rand()) % 5000; // NOLINT(runtime/threadsafe_fn)
return portnum;
}
int WrappedRead(int fd, unsigned char* buf, int read_len) {
int res;
while (1) {
res = read(fd, buf, read_len);
if (res == -1) {
if ((errno == EAGAIN) || (errno == EINTR))
continue;
}
break;
}
return res;
}
int WrappedWrite(int fd, const unsigned char* buf, int write_len) {
int res, written_so_far = 0;
while (written_so_far < write_len) {
res = write(fd, buf + written_so_far, write_len - written_so_far);
if (res == -1) {
if ((errno == EAGAIN) || (errno == EINTR))
continue;
break;
}
if (res == 0)
break;
written_so_far += res;
}
return written_so_far;
}
bool ConnectToServer(const string& host_name, uint16_t port_num,
int* client_fd) {
struct addrinfo hints;
struct addrinfo* results;
struct addrinfo* r;
int client_sock, ret_val;
char port_str[10];
// Convert the port number to a C-style string.
snprintf(port_str, sizeof(port_str), "%hu", port_num);
// Zero out the hints data structure using memset.
memset(&hints, 0, sizeof(hints));
// Indicate we're happy with either AF_INET or AF_INET6 addresses.
hints.ai_family = AF_UNSPEC;
// Constrain the answers to SOCK_STREAM addresses.
hints.ai_socktype = SOCK_STREAM;
// Do the lookup.
if ((ret_val = getaddrinfo(host_name.c_str(),
port_str,
&hints,
&results)) != 0) {
cerr << "getaddrinfo failed: ";
cerr << gai_strerror(ret_val) << endl;
return false;
}
// Loop through, trying each out until one succeeds.
for (r = results; r != nullptr; r = r->ai_next) {
// Try manufacturing the socket.
if ((client_sock = socket(r->ai_family, SOCK_STREAM, 0)) == -1) {
continue;
}
// Try connecting to the peer.
if (connect(client_sock, r->ai_addr, r->ai_addrlen) == -1) {
continue;
}
*client_fd = client_sock;
freeaddrinfo(results);
return true;
}
freeaddrinfo(results);
return false;
}
} // namespace hw4