-
Notifications
You must be signed in to change notification settings - Fork 141
/
http1_parser.h
873 lines (788 loc) · 28.2 KB
/
http1_parser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
#ifndef H_HTTP1_PARSER_H
/*
Copyright: Boaz Segev, 2017-2020
License: MIT
Feel free to copy, use and enjoy according to the license provided.
*/
/**
This is a callback based parser. It parses the skeleton of the HTTP/1.x protocol
and leaves most of the work (validation, error checks, etc') to the callbacks.
*/
#define H_HTTP1_PARSER_H
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
/* *****************************************************************************
Parser Settings
***************************************************************************** */
#ifndef HTTP_HEADERS_LOWERCASE
/**
* When defined, HTTP headers will be converted to lowercase and header
* searches will be case sensitive.
*
* This is highly recommended, required by facil.io and helps with HTTP/2
* compatibility.
*/
#define HTTP_HEADERS_LOWERCASE 1
#endif
#ifndef HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING
#define HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING 1
#endif
#ifndef FIO_MEMCHAR
/** Prefer a custom memchr implementation. Usualy memchr is better. */
#define FIO_MEMCHAR 0
#endif
#ifndef ALLOW_UNALIGNED_MEMORY_ACCESS
/** Peforms some optimizations assuming unaligned memory access is okay. */
#define ALLOW_UNALIGNED_MEMORY_ACCESS 0
#endif
#ifndef HTTP1_PARSER_CONVERT_EOL2NUL
#define HTTP1_PARSER_CONVERT_EOL2NUL 0
#endif
/* *****************************************************************************
Parser API
***************************************************************************** */
/** this struct contains the state of the parser. */
typedef struct http1_parser_s {
struct http1_parser_protected_read_only_state_s {
long long content_length; /* negative values indicate chuncked data state */
ssize_t read; /* total number of bytes read so far (body only) */
uint8_t *next; /* the known position for the end of request/response */
uint8_t reserved; /* for internal use */
} state;
} http1_parser_s;
#define HTTP1_PARSER_INIT \
{ \
{ 0 } \
}
/**
* Returns the amount of data actually consumed by the parser.
*
* The value 0 indicates there wasn't enough data to be parsed and the same
* buffer (with more data) should be resubmitted.
*
* A value smaller than the buffer size indicates that EITHER a request /
* response was detected OR that the leftover could not be consumed because more
* data was required.
*
* Simply resubmit the reminder of the data to continue parsing.
*
* A request / response callback automatically stops the parsing process,
* allowing the user to adjust or refresh the state of the data.
*/
static size_t http1_parse(http1_parser_s *parser, void *buffer, size_t length);
/* *****************************************************************************
Required Callbacks (MUST be implemented by including file)
***************************************************************************** */
/** called when a request was received. */
static int http1_on_request(http1_parser_s *parser);
/** called when a response was received. */
static int http1_on_response(http1_parser_s *parser);
/** called when a request method is parsed. */
static int http1_on_method(http1_parser_s *parser, char *method,
size_t method_len);
/** called when a response status is parsed. the status_str is the string
* without the prefixed numerical status indicator.*/
static int http1_on_status(http1_parser_s *parser, size_t status,
char *status_str, size_t len);
/** called when a request path (excluding query) is parsed. */
static int http1_on_path(http1_parser_s *parser, char *path, size_t path_len);
/** called when a request path (excluding query) is parsed. */
static int http1_on_query(http1_parser_s *parser, char *query,
size_t query_len);
/** called when a the HTTP/1.x version is parsed. */
static int http1_on_version(http1_parser_s *parser, char *version, size_t len);
/** called when a header is parsed. */
static int http1_on_header(http1_parser_s *parser, char *name, size_t name_len,
char *data, size_t data_len);
/** called when a body chunk is parsed. */
static int http1_on_body_chunk(http1_parser_s *parser, char *data,
size_t data_len);
/** called when a protocol error occurred. */
static int http1_on_error(http1_parser_s *parser);
/* *****************************************************************************
Implementation Details
***************************************************************************** */
#if HTTP_HEADERS_LOWERCASE
#define HEADER_NAME_IS_EQ(var_name, const_name, len) \
(!memcmp((var_name), (const_name), (len)))
#else
#define HEADER_NAME_IS_EQ(var_name, const_name, len) \
(!strncasecmp((var_name), (const_name), (len)))
#endif
#define HTTP1_P_FLAG_STATUS_LINE 1
#define HTTP1_P_FLAG_HEADER_COMPLETE 2
#define HTTP1_P_FLAG_COMPLETE 4
#define HTTP1_P_FLAG_CLENGTH 8
#define HTTP1_PARSER_BIT_16 16
#define HTTP1_PARSER_BIT_32 32
#define HTTP1_P_FLAG_CHUNKED 64
#define HTTP1_P_FLAG_RESPONSE 128
/* *****************************************************************************
Seeking for characters in a string
***************************************************************************** */
#if FIO_MEMCHAR
/**
* This seems to be faster on some systems, especially for smaller distances.
*
* On newer systems, `memchr` should be faster.
*/
static int seek2ch(uint8_t **buffer, register uint8_t *const limit,
const uint8_t c) {
if (*buffer >= limit)
return 0;
if (**buffer == c) {
return 1;
}
#if !HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
/* too short for this mess */
if ((uintptr_t)limit <= 16 + ((uintptr_t)*buffer & (~(uintptr_t)7)))
goto finish;
/* align memory */
{
const uint8_t *alignment =
(uint8_t *)(((uintptr_t)(*buffer) & (~(uintptr_t)7)) + 8);
if (*buffer < alignment)
*buffer += 1; /* we already tested this char */
if (limit >= alignment) {
while (*buffer < alignment) {
if (**buffer == c) {
return 1;
}
*buffer += 1;
}
}
}
const uint8_t *limit64 = (uint8_t *)((uintptr_t)limit & (~(uintptr_t)7));
#else
const uint8_t *limit64 = (uint8_t *)limit - 7;
#endif
uint64_t wanted1 = 0x0101010101010101ULL * c;
for (; *buffer < limit64; *buffer += 8) {
const uint64_t eq1 = ~((*((uint64_t *)*buffer)) ^ wanted1);
const uint64_t t0 = (eq1 & 0x7f7f7f7f7f7f7f7fllu) + 0x0101010101010101llu;
const uint64_t t1 = (eq1 & 0x8080808080808080llu);
if ((t0 & t1)) {
break;
}
}
#if !HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
finish:
#endif
while (*buffer < limit) {
if (**buffer == c) {
return 1;
}
(*buffer)++;
}
return 0;
}
#else
/* a helper that seeks any char, converts it to NUL and returns 1 if found. */
inline static uint8_t seek2ch(uint8_t **pos, uint8_t *const limit, uint8_t ch) {
/* This is library based alternative that is sometimes slower */
if (*pos >= limit)
return 0;
if (**pos == ch) {
return 1;
}
uint8_t *tmp = memchr(*pos, ch, limit - (*pos));
if (tmp) {
*pos = tmp;
return 1;
}
*pos = limit;
return 0;
}
#endif
/* a helper that seeks the EOL, converts it to NUL and returns it's length */
inline static uint8_t seek2eol(uint8_t **pos, uint8_t *const limit) {
/* single char lookup using memchr might be better when target is far... */
if (!seek2ch(pos, limit, '\n'))
return 0;
if ((*pos)[-1] == '\r') {
#if HTTP1_PARSER_CONVERT_EOL2NUL
(*pos)[-1] = (*pos)[0] = 0;
#endif
return 2;
}
#if HTTP1_PARSER_CONVERT_EOL2NUL
(*pos)[0] = 0;
#endif
return 1;
}
/* *****************************************************************************
Change a letter to lower case (latin only)
***************************************************************************** */
static uint8_t http_tolower(uint8_t c) {
if (c >= 'A' && c <= 'Z')
c |= 32;
return c;
}
/* *****************************************************************************
String to Number
***************************************************************************** */
/** Converts a String to a number using base 10 */
static long long http1_atol(const uint8_t *buf, const uint8_t **end) {
register unsigned long long i = 0;
uint8_t inv = 0;
while (*buf == ' ' || *buf == '\t' || *buf == '\f')
++buf;
while (*buf == '-' || *buf == '+')
inv ^= (*(buf++) == '-');
while (i <= ((((~0ULL) >> 1) / 10)) && *buf >= '0' && *buf <= '9') {
i = i * 10;
i += *buf - '0';
++buf;
}
/* test for overflow */
if (i >= (~((~0ULL) >> 1)) || (*buf >= '0' && *buf <= '9'))
i = (~0ULL >> 1);
if (inv)
i = 0ULL - i;
if (end)
*end = buf;
return i;
}
/** Converts a String to a number using base 16, overflow limited to 113bytes */
static long long http1_atol16(const uint8_t *buf, const uint8_t **end) {
register unsigned long long i = 0;
uint8_t inv = 0;
for (int limit_ = 0;
(*buf == ' ' || *buf == '\t' || *buf == '\f') && limit_ < 32; ++limit_)
++buf;
for (int limit_ = 0; (*buf == '-' || *buf == '+') && limit_ < 32; ++limit_)
inv ^= (*(buf++) == '-');
if (*buf == '0')
++buf;
if ((*buf | 32) == 'x')
++buf;
for (int limit_ = 0; (*buf == '0') && limit_ < 32; ++limit_)
++buf;
while (!(i & (~((~(0ULL)) >> 4)))) {
if (*buf >= '0' && *buf <= '9') {
i <<= 4;
i |= *buf - '0';
} else if ((*buf | 32) >= 'a' && (*buf | 32) <= 'f') {
i <<= 4;
i |= (*buf | 32) - ('a' - 10);
} else
break;
++buf;
}
if (inv)
i = 0ULL - i;
if (end)
*end = buf;
return i;
}
/* *****************************************************************************
HTTP/1.1 parsre stages
***************************************************************************** */
inline static int http1_consume_response_line(http1_parser_s *parser,
uint8_t *start, uint8_t *end) {
parser->state.reserved |= HTTP1_P_FLAG_RESPONSE;
uint8_t *tmp = start;
if (!seek2ch(&tmp, end, ' '))
return -1;
if (http1_on_version(parser, (char *)start, tmp - start))
return -1;
tmp = start = tmp + 1;
if (!seek2ch(&tmp, end, ' '))
return -1;
if (http1_on_status(parser, http1_atol(start, NULL), (char *)(tmp + 1),
end - tmp))
return -1;
return 0;
}
inline static int http1_consume_request_line(http1_parser_s *parser,
uint8_t *start, uint8_t *end) {
uint8_t *tmp = start;
uint8_t *host_start = NULL;
uint8_t *host_end = NULL;
if (!seek2ch(&tmp, end, ' '))
return -1;
if (http1_on_method(parser, (char *)start, tmp - start))
return -1;
tmp = start = tmp + 1;
if (start[0] == 'h' && start[1] == 't' && start[2] == 't' &&
start[3] == 'p') {
if (start[4] == ':' && start[5] == '/' && start[6] == '/') {
/* Request URI is in long form... emulate Host header instead. */
tmp = host_end = host_start = (start += 7);
} else if (start[4] == 's' && start[5] == ':' && start[6] == '/' &&
start[7] == '/') {
/* Secure request is in long form... emulate Host header instead. */
tmp = host_end = host_start = (start += 8);
} else
goto review_path;
if (!seek2ch(&tmp, end, ' '))
return -1;
*tmp = ' ';
if (!seek2ch(&host_end, tmp, '/')) {
if (http1_on_path(parser, (char *)"/", 1))
return -1;
goto start_version;
}
host_end[0] = '/';
start = host_end;
}
review_path:
tmp = start;
if (seek2ch(&tmp, end, '?')) {
if (http1_on_path(parser, (char *)start, tmp - start))
return -1;
tmp = start = tmp + 1;
if (!seek2ch(&tmp, end, ' '))
return -1;
if (tmp - start > 0 && http1_on_query(parser, (char *)start, tmp - start))
return -1;
} else {
tmp = start;
if (!seek2ch(&tmp, end, ' '))
return -1;
if (http1_on_path(parser, (char *)start, tmp - start))
return -1;
}
start_version:
start = tmp + 1;
if (start + 5 >= end) /* require "HTTP/" */
return -1;
if (http1_on_version(parser, (char *)start, end - start))
return -1;
/* */
if (host_start && http1_on_header(parser, (char *)"host", 4,
(char *)host_start, host_end - host_start))
return -1;
return 0;
}
#ifndef HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER
inline /* inline the function of it's short enough */
#endif
static int
http1_consume_header_transfer_encoding(http1_parser_s *parser,
uint8_t *start, uint8_t *end_name,
uint8_t *start_value, uint8_t *end) {
/* this removes the `chunked` marker and prepares to "unchunk" the data */
while (start_value < end && (end[-1] == ',' || end[-1] == ' '))
--end;
if ((end - start_value) == 7 &&
#if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
(((uint32_t *)(start_value))[0] | 0x20202020) ==
((uint32_t *)"chun")[0] &&
(((uint32_t *)(start_value + 3))[0] | 0x20202020) ==
((uint32_t *)"nked")[0]
#else
((start_value[0] | 32) == 'c' && (start_value[1] | 32) == 'h' &&
(start_value[2] | 32) == 'u' && (start_value[3] | 32) == 'n' &&
(start_value[4] | 32) == 'k' && (start_value[5] | 32) == 'e' &&
(start_value[6] | 32) == 'd')
#endif
) {
/* simple case,only `chunked` as a value */
parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
parser->state.content_length = 0;
start_value += 7;
while (start_value < end && (*start_value == ',' || *start_value == ' '))
++start_value;
if (!(end - start_value))
return 0;
} else if ((end - start_value) > 7 &&
((end[(-7 + 0)] | 32) == 'c' && (end[(-7 + 1)] | 32) == 'h' &&
(end[(-7 + 2)] | 32) == 'u' && (end[(-7 + 3)] | 32) == 'n' &&
(end[(-7 + 4)] | 32) == 'k' && (end[(-7 + 5)] | 32) == 'e' &&
(end[(-7 + 6)] | 32) == 'd')) {
/* simple case,`chunked` at the end of list (RFC required) */
parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
parser->state.content_length = 0;
end -= 7;
while (start_value < end && (end[-1] == ',' || end[-1] == ' '))
--end;
if (!(end - start_value))
return 0;
}
#ifdef HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER /* RFC diisallows this */
else if ((end - start_value) > 7 && (end - start_value) < 256) {
/* complex case, `the, chunked, marker, is in the middle of list */
uint8_t val[256];
size_t val_len = 0;
while (start_value < end && val_len < 256) {
if ((end - start_value) >= 7) {
if (
#if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED
(((uint32_t *)(start_value))[0] | 0x20202020) ==
((uint32_t *)"chun")[0] &&
(((uint32_t *)(start_value + 3))[0] | 0x20202020) ==
((uint32_t *)"nked")[0]
#else
((start_value[0] | 32) == 'c' && (start_value[1] | 32) == 'h' &&
(start_value[2] | 32) == 'u' && (start_value[3] | 32) == 'n' &&
(start_value[4] | 32) == 'k' && (start_value[5] | 32) == 'e' &&
(start_value[6] | 32) == 'd')
#endif
) {
parser->state.reserved |= HTTP1_P_FLAG_CHUNKED;
parser->state.content_length = 0;
start_value += 7;
/* skip comma / white space */
while (start_value < end &&
(*start_value == ',' || *start_value == ' '))
++start_value;
continue;
}
}
/* copy value */
while (start_value < end && val_len < 256 && start_value[0] != ',') {
val[val_len++] = *start_value;
++start_value;
}
/* copy comma */
if (start_value[0] == ',' && val_len < 256) {
val[val_len++] = *start_value;
++start_value;
}
/* skip spaces */
while (start_value < end && start_value[0] == ' ') {
++start_value;
}
}
if (val_len < 256) {
while (start_value < end && val_len < 256) {
val[val_len++] = *start_value;
++start_value;
}
val[val_len] = 0;
}
/* perform callback with `val` or indicate error */
if (val_len == 256 ||
(val_len && http1_on_header(parser, (char *)start, (end_name - start),
(char *)val, val_len)))
return -1;
return 0;
}
#endif /* HTTP1_ALLOW_CHUNKED_IN_MIDDLE_OF_HEADER */
/* perform callback */
if (http1_on_header(parser, (char *)start, (end_name - start),
(char *)start_value, end - start_value))
return -1;
return 0;
}
inline static int http1_consume_header_top(http1_parser_s *parser,
uint8_t *start, uint8_t *end_name,
uint8_t *start_value, uint8_t *end) {
if ((end_name - start) == 14 &&
#if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED && HTTP_HEADERS_LOWERCASE
*((uint64_t *)start) == *((uint64_t *)"content-") &&
*((uint64_t *)(start + 6)) == *((uint64_t *)"t-length")
#else
HEADER_NAME_IS_EQ((char *)start, "content-length", 14)
#endif
) {
/* handle the special `content-length` header */
if ((parser->state.reserved & HTTP1_P_FLAG_CHUNKED))
return 0; /* ignore if `chunked` */
long long old_clen = parser->state.content_length;
parser->state.content_length = http1_atol(start_value, NULL);
if ((parser->state.reserved & HTTP1_P_FLAG_CLENGTH) &&
old_clen != parser->state.content_length) {
/* content-length header repeated with conflict */
return -1;
}
parser->state.reserved |= HTTP1_P_FLAG_CLENGTH;
} else if ((end_name - start) == 17 && (end - start_value) >= 7 &&
!parser->state.content_length &&
#if HTTP1_UNALIGNED_MEMORY_ACCESS_ENABLED && HTTP_HEADERS_LOWERCASE
*((uint64_t *)start) == *((uint64_t *)"transfer") &&
*((uint64_t *)(start + 8)) == *((uint64_t *)"-encodin")
#else
HEADER_NAME_IS_EQ((char *)start, "transfer-encoding", 17)
#endif
) {
/* handle the special `transfer-encoding: chunked` header */
return http1_consume_header_transfer_encoding(parser, start, end_name,
start_value, end);
}
/* perform callback */
if (http1_on_header(parser, (char *)start, (end_name - start),
(char *)start_value, end - start_value))
return -1;
return 0;
}
inline static int http1_consume_header_trailer(http1_parser_s *parser,
uint8_t *start,
uint8_t *end_name,
uint8_t *start_value,
uint8_t *end) {
if ((end_name - start) > 1 && start[0] == 'x') {
/* X- headers are allowed */
goto white_listed;
}
/* white listed trailer names */
const struct {
char *name;
long len;
} http1_trailer_white_list[] = {
{"server-timing", 13}, /* specific for client data... */
{NULL, 0}, /* end of list marker */
};
for (size_t i = 0; http1_trailer_white_list[i].name; ++i) {
if ((long)(end_name - start) == http1_trailer_white_list[i].len &&
HEADER_NAME_IS_EQ((char *)start, http1_trailer_white_list[i].name,
http1_trailer_white_list[i].len)) {
/* header disallowed here */
goto white_listed;
}
}
return 0;
white_listed:
/* perform callback */
if (http1_on_header(parser, (char *)start, (end_name - start),
(char *)start_value, end - start_value))
return -1;
return 0;
}
inline static int http1_consume_header(http1_parser_s *parser, uint8_t *start,
uint8_t *end) {
uint8_t *end_name = start;
/* divide header name from data */
if (!seek2ch(&end_name, end, ':'))
return -1;
if (end_name[-1] == ' ' || end_name[-1] == '\t')
return -1;
#if HTTP_HEADERS_LOWERCASE
for (uint8_t *t = start; t < end_name; t++) {
*t = http_tolower(*t);
}
#endif
uint8_t *start_value = end_name + 1;
// clear away leading white space from value.
while (start_value < end &&
(start_value[0] == ' ' || start_value[0] == '\t')) {
start_value++;
};
return (parser->state.read ? http1_consume_header_trailer
: http1_consume_header_top)(
parser, start, end_name, start_value, end);
}
/* *****************************************************************************
HTTP/1.1 Body handling
***************************************************************************** */
inline static int http1_consume_body_streamed(http1_parser_s *parser,
void *buffer, size_t length,
uint8_t **start) {
uint8_t *end = *start + parser->state.content_length - parser->state.read;
uint8_t *const stop = ((uint8_t *)buffer) + length;
if (end > stop)
end = stop;
if (end > *start &&
http1_on_body_chunk(parser, (char *)(*start), end - *start))
return -1;
parser->state.read += (end - *start);
*start = end;
if (parser->state.content_length <= parser->state.read)
parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
return 0;
}
inline static int http1_consume_body_chunked(http1_parser_s *parser,
void *buffer, size_t length,
uint8_t **start) {
uint8_t *const stop = ((uint8_t *)buffer) + length;
uint8_t *end = *start;
while (*start < stop) {
if (parser->state.content_length == 0) {
if (end + 2 >= stop)
return 0;
if ((end[0] == '\r' && end[1] == '\n')) {
/* remove tailing EOL that wasn't processed and retest */
end += 2;
*start = end;
if (end + 2 >= stop)
return 0;
}
long long chunk_len = http1_atol16(end, (const uint8_t **)&end);
if (end + 2 > stop) /* overflowed? */
return 0;
if ((end[0] != '\r' || end[1] != '\n'))
return -1; /* required EOL after content length */
end += 2;
parser->state.content_length = 0 - chunk_len;
*start = end;
if (parser->state.content_length == 0) {
/* all chunked data was parsed */
/* update content-length */
parser->state.content_length = parser->state.read;
#ifdef HTTP_ADD_CONTENT_LENGTH_HEADER_IF_MISSING
{ /* add virtual header ... ? */
char buf[512];
size_t buf_len = 512;
size_t tmp_len = parser->state.read;
buf[--buf_len] = 0;
while (tmp_len) {
size_t mod = tmp_len / 10;
buf[--buf_len] = '0' + (tmp_len - (mod * 10));
tmp_len = mod;
}
if (!(parser->state.reserved & HTTP1_P_FLAG_CLENGTH) &&
http1_on_header(parser, "content-length", 14,
(char *)buf + buf_len, 511 - buf_len)) {
return -1;
}
}
#endif
/* FIXME: consume trailing EOL */
if (*start + 2 <= stop && (start[0][0] == '\r' || start[0][0] == '\n'))
*start += 1 + (start[0][1] == '\r' || start[0][1] == '\n');
else {
/* remove the "headers complete" and "trailer" flags */
parser->state.reserved =
HTTP1_P_FLAG_STATUS_LINE | HTTP1_P_FLAG_CLENGTH;
return -2;
}
/* the parsing complete flag */
parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
return 0;
}
}
end = *start + (0 - parser->state.content_length);
if (end > stop)
end = stop;
if (end > *start &&
http1_on_body_chunk(parser, (char *)(*start), end - *start)) {
return -1;
}
parser->state.read += (end - *start);
parser->state.content_length += (end - *start);
*start = end;
}
return 0;
}
inline static int http1_consume_body(http1_parser_s *parser, void *buffer,
size_t length, uint8_t **start) {
if (parser->state.content_length > 0 &&
parser->state.content_length > parser->state.read) {
/* normal, streamed data */
return http1_consume_body_streamed(parser, buffer, length, start);
} else if (parser->state.content_length <= 0 &&
(parser->state.reserved & HTTP1_P_FLAG_CHUNKED)) {
/* chuncked encoding */
return http1_consume_body_chunked(parser, buffer, length, start);
} else {
/* nothing to do - parsing complete */
parser->state.reserved |= HTTP1_P_FLAG_COMPLETE;
}
return 0;
}
/* *****************************************************************************
HTTP/1.1 parsre function
***************************************************************************** */
#if DEBUG
#include <assert.h>
#define HTTP1_ASSERT assert
#else
#define HTTP1_ASSERT(...)
#endif
/**
* Returns the amount of data actually consumed by the parser.
*
* The value 0 indicates there wasn't enough data to be parsed and the same
* buffer (with more data) should be resubmitted.
*
* A value smaller than the buffer size indicates that EITHER a request /
* response was detected OR that the leftover could not be consumed because more
* data was required.
*
* Simply resubmit the reminder of the data to continue parsing.
*
* A request / response callback automatically stops the parsing process,
* allowing the user to adjust or refresh the state of the data.
*/
static size_t http1_parse(http1_parser_s *parser, void *buffer, size_t length) {
if (!length)
return 0;
HTTP1_ASSERT(parser && buffer);
parser->state.next = NULL;
uint8_t *start = (uint8_t *)buffer;
uint8_t *end = start;
uint8_t *const stop = start + length;
uint8_t eol_len = 0;
#define HTTP1_CONSUMED ((size_t)((uintptr_t)start - (uintptr_t)buffer))
re_eval:
switch ((parser->state.reserved & 7)) {
case 0: /* request / response line */
/* clear out any leading white space */
while ((start < stop) &&
(*start == '\r' || *start == '\n' || *start == ' ' || *start == 0)) {
++start;
}
end = start;
/* make sure the whole line is available*/
if (!(eol_len = seek2eol(&end, stop)))
return HTTP1_CONSUMED;
if (start[0] == 'H' && start[1] == 'T' && start[2] == 'T' &&
start[3] == 'P') {
/* HTTP response */
if (http1_consume_response_line(parser, start, end - eol_len + 1))
goto error;
} else if (http_tolower(start[0]) >= 'a' && http_tolower(start[0]) <= 'z') {
/* HTTP request */
if (http1_consume_request_line(parser, start, end - eol_len + 1))
goto error;
} else
goto error;
end = start = end + 1;
parser->state.reserved |= HTTP1_P_FLAG_STATUS_LINE;
/* fallthrough */
case 1: /* headers */
do {
if (start >= stop)
return HTTP1_CONSUMED; /* buffer ended on header line */
if (*start == '\r' || *start == '\n') {
goto finished_headers; /* empty line, end of headers */
}
end = start;
if (!(eol_len = seek2eol(&end, stop)))
return HTTP1_CONSUMED;
if (http1_consume_header(parser, start, end - eol_len + 1))
goto error;
end = start = end + 1;
} while ((parser->state.reserved & HTTP1_P_FLAG_HEADER_COMPLETE) == 0);
finished_headers:
++start;
if (*start == '\n')
++start;
end = start;
parser->state.reserved |= HTTP1_P_FLAG_HEADER_COMPLETE;
/* fallthrough */
case (HTTP1_P_FLAG_HEADER_COMPLETE | HTTP1_P_FLAG_STATUS_LINE):
/* request body */
{
int t3 = http1_consume_body(parser, buffer, length, &start);
switch (t3) {
case -1:
goto error;
case -2:
goto re_eval;
}
break;
}
}
/* are we done ? */
if (parser->state.reserved & HTTP1_P_FLAG_COMPLETE) {
parser->state.next = start;
if (((parser->state.reserved & HTTP1_P_FLAG_RESPONSE)
? http1_on_response
: http1_on_request)(parser))
goto error;
parser->state = (struct http1_parser_protected_read_only_state_s){0};
}
return HTTP1_CONSUMED;
error:
http1_on_error(parser);
parser->state = (struct http1_parser_protected_read_only_state_s){0};
return length;
#undef HTTP1_CONSUMED
}
#endif