-
Notifications
You must be signed in to change notification settings - Fork 0
/
linker.c
1951 lines (1505 loc) · 81 KB
/
linker.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#define _CRT_SECURE_NO_WARNINGS
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <mbstring.h>
typedef unsigned __int8 u8;
typedef unsigned __int16 u16;
typedef unsigned __int32 u32;
typedef unsigned __int64 u64;
typedef __int8 s8;
typedef __int16 s16;
typedef __int32 s32;
typedef __int64 s64;
#define kilo_bytes(a) ((a) * 1024ULL)
#define mega_bytes(a) ((kilo_bytes(a)) * 1024ULL)
#define giga_bytes(a) ((mega_bytes(a)) * 1024ULL)
#define array_count(a) (sizeof(a)/sizeof(*a))
#define offset_in_type(type, member) (u64)(&((type *)0)->member)
int print(char *format, ...){
va_list va;
va_start(va, format);
int ret = vprintf(format, va);
va_end(va);
fflush(0);
return ret;
}
#include "memory_arena.c"
struct file{
u8 *memory;
size_t size;
};
struct file load_file(char *file_name){
struct file file = {0};
FILE *handle = fopen(file_name, "rb");
if(!handle) return file;
fseek(handle, 0, SEEK_END);
size_t size = _ftelli64(handle);
if(size == -1) return file;
fseek(handle, 0, SEEK_SET);
u8 *memory = malloc(size);
if(!memory) return file;
fread(memory, 1, size, handle);
fclose(handle);
return (struct file){ .memory = memory, .size = size };
}
u64 dbj2(char *name, size_t name_length){
// Good ol' dbj2.
u64 name_hash = 5381;
for(u32 i = 0; i < name_length; i++){
name_hash = (name_hash << 5) + name_hash + name[i];
}
return name_hash;
}
//_____________________________________________________________________________________________________________________
// Streams
struct stream{
u8 *data;
u64 size;
u64 offset;
};
int stream_read(struct stream *stream, void *data, size_t size){
if(size + stream->offset > stream->size){
stream->offset = stream->size;
return 1;
}
memcpy(data, stream->data + stream->offset, size);
stream->offset += size;
return 0;
}
int stream_peek(struct stream *stream, void *data, size_t size){
if(size + stream->offset > stream->size){
return 1;
}
memcpy(data, stream->data + stream->offset, size);
return 0;
}
int stream_skip(struct stream *stream, size_t size){
if(size + stream->offset > stream->size){
stream->offset = stream->size;
return 1;
}
stream->offset += size;
return 0;
}
void *stream_read_array_by_pointer(struct stream *stream, u64 member_size, u64 count){
// @cleanup: Overflow.
u64 size = member_size * count;
if(size + stream->offset > stream->size){
stream->offset = stream->size;
return 0;
}
void *ret = (stream->data + stream->offset);
stream->offset += size;
return ret;
}
void *stream_read_range_by_pointer(struct stream *stream, u64 start, u64 size, u64 count){
// @cleanup: Overflow.
u64 end = start + size * count;
if(start > stream->size || end > stream->size) return 0;
return stream->data + start;
}
//_____________________________________________________________________________________________________________________
// Strings
struct string{
char *data;
size_t size;
};
#define string(a) (struct string){ .data = (a), .size = sizeof(a)-1 }
int string_match(struct string a, struct string b){
if(a.size != b.size) return 0;
return strncmp(a.data, b.data, a.size) == 0;
}
struct string string_strip_whitespace(struct string string){
while(string.size && string.data[string.size-1] == ' '){
string.size -= 1;
}
while(string.size && string.data[0] == ' '){
string.size -= 1;
string.data += 1;
}
return string;
}
//_____________________________________________________________________________________________________________________
// Object files
struct coff_file_header{
u16 machine;
u16 number_of_sections;
u32 timestamp;
u32 pointer_to_symbol_table;
u32 number_of_symbols;
u16 size_of_optional_header;
u16 file_characteristics;
};
struct image_optional_header64{
u16 magic;
u8 major_linker_version;
u8 minor_linker_version;
u32 size_of_code;
u32 size_of_initialized_data;
u32 size_of_uninitialized_data;
u32 address_of_entry_point;
u32 base_of_code;
u64 image_base;
u32 section_alignment;
u32 file_alignment;
u16 major_operating_system_version;
u16 minor_operating_system_version;
u16 major_image_version;
u16 minor_image_version;
u16 major_subsystem_version;
u16 minor_subsystem_version;
u32 win32_version_value;
u32 size_of_image;
u32 size_of_headers;
u32 checksum;
u16 subsystem;
u16 dll_characteristics;
u64 size_of_stack_reserve;
u64 size_of_stack_commit;
u64 size_of_heap_reserve;
u64 size_of_heap_commit;
u32 loader_flags;
u32 number_of_rva_and_sizes;
struct image_data_directory{
u32 virtual_address;
u32 size;
} data_directory[16];
};
struct coff_section_header{
char name[8];
u32 virtual_size;
u32 virtual_address;
u32 size_of_raw_data;
u32 pointer_to_raw_data;
u32 pointer_to_relocations;
u32 pointer_to_line_numbers;
u16 number_of_relocations;
u16 number_of_line_numbers;
u32 characteristics;
};
#ifdef __HLC__
// This is for my compiler... I am leaving this here for now.
struct __declspec(packed) coff_relocation {
u32 relocation_address;
u32 symbol_table_index;
u16 relocation_type;
};
struct __declspec(packed) coff_symbol {
union{
char short_name[8];
struct{
u32 zeroes;
u32 offset;
} long_name;
};
u32 value;
u16 section_number;
u16 symbol_type;
u8 storage_class;
u8 number_of_auxiliary_symbol_records;
};
#else
#define _Static_assert(a, b) typedef int static_assert_declaration[(a) ? 1 : -1];
#pragma pack(push, 1)
struct coff_relocation {
u32 relocation_address;
u32 symbol_table_index;
u16 relocation_type;
};
struct coff_symbol {
union{
char short_name[8];
struct{
u32 zeroes;
u32 offset;
} long_name;
};
u32 value;
u16 section_number;
u16 symbol_type;
u8 storage_class;
u8 number_of_auxiliary_symbol_records;
};
#pragma pack(pop)
#endif
_Static_assert(sizeof(struct coff_relocation) == 10, "coff relocation size incorrect.");
_Static_assert(sizeof(struct coff_symbol) == 18, "coff symbol size incorrect.");
struct object_file{
char *file_name;
struct stream stream;
u16 number_of_sections;
u32 number_of_symbols;
u32 string_table_size;
struct coff_section_header *section_headers;
struct coff_symbol *symbol_table;
u8 *string_table;
};
int parse_object_file(struct object_file *object_file){
struct stream stream = object_file->stream;
struct coff_file_header file_header;
if(stream_read(&stream, &file_header, sizeof(file_header))) return 0;
if(file_header.machine != 0x8664) return 0;
//
// Get the symbol table.
//
struct coff_symbol *symbol_table = stream_read_range_by_pointer(&stream, file_header.pointer_to_symbol_table, sizeof(struct coff_symbol), file_header.number_of_symbols);
if(!symbol_table) return 0;
stream_skip(&stream, file_header.size_of_optional_header);
struct coff_section_header *section_headers = stream_read_array_by_pointer(&stream, sizeof(*section_headers), file_header.number_of_sections);
if(!section_headers) return 1;
for(u32 index = 0; index < file_header.number_of_sections; index++){
struct coff_section_header *section_header = §ion_headers[index];
//
// Check that the contents are in bounds.
//
if(section_header->characteristics & /*UNINITIALIZED_DATA*/0x00000080){
if(section_header->number_of_relocations) return 0; // @paranoid
if(section_header->pointer_to_raw_data) return 0;
continue;
}
void *section_data = stream_read_range_by_pointer(&stream, section_header->pointer_to_raw_data, 1, section_header->size_of_raw_data);
if(!section_data) return 0;
// @cleanup: Do the relocation overflow thing.
struct coff_relocation *relocations = stream_read_range_by_pointer(&stream, section_header->pointer_to_relocations, sizeof(struct coff_relocation), section_header->number_of_relocations);
if(!relocations) return 0;
}
stream.offset = file_header.pointer_to_symbol_table + sizeof(struct coff_symbol) * file_header.number_of_symbols;
u32 string_table_size; // @note: Contains this 'string_table_size' field.
if(stream_peek(&stream, &string_table_size, sizeof(string_table_size))) return 0;
if(string_table_size < 4) return 0;
u8 *string_table = stream_read_array_by_pointer(&stream, 1, string_table_size);
if(!string_table || string_table[string_table_size-1] != 0) return 0;
object_file->number_of_symbols = file_header.number_of_symbols;
object_file->symbol_table = symbol_table;
object_file->number_of_sections = file_header.number_of_sections;
object_file->section_headers = section_headers;
object_file->string_table_size = string_table_size;
object_file->string_table = string_table;
return 1;
}
//_____________________________________________________________________________________________________________________
// Ar files.
struct ar_file{
char *file_name;
struct stream stream;
u32 amount_of_members;
u32 amount_of_symbols;
u32 *member_offsets;
u16 *symbol_member_indices;
char **symbol_string_table;
};
struct ar_file_header{
//
// An ASCII file-identifier.
//
u8 file_identifier[16];
//
// The modification time in seconds, as ASCII-decimal.
//
u8 file_modification_timestamp[12];
//
// Owner and group ID as ASCII-decimal.
//
u8 owner_identifier[6];
u8 group_identifier[6];
//
// The file type and permissions as ASCII-octal.
//
u8 file_mode[8];
//
// The size of the file in bytes as ASCII-decimal.
//
u8 file_size_in_bytes[10];
//
// The characters '`\n`
//
u8 ending_characters[2];
};
// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#archive-library-file-format
//
// The ar (or archiver) file-format is used by .lib-files
// and combines multiple .obj-files into one .lib-file.
//
// The file begins with the signature '!<arch>\n'.
// After this the file is a sequence of file sections.
// Each file section starts with a header which specifies the size
// and name of the file section.
// The header is followed by the data of the file-section.
//
// The first two or three file-sections are special.
// The first and second one are a symbol index and have the name '/'.
// The first one in big-endian, the second one in little-endian.
// The third section is optionally '//', the long name data.
//
int parse_ar_file(struct memory_arena *arena, struct ar_file *ar_file){
struct stream stream = ar_file->stream;
char *file_name = ar_file->file_name;
char signature[8];
if(stream_read(&stream, signature, sizeof(signature))) return 0;
if(memcmp(signature, "!<arch>\n", 8) != 0) return 0;
struct stream little_endian_symbol_index = {0};
struct ar_file_header ar_file_header;
for(u32 file_header_index = 0; !stream_read(&stream, &ar_file_header, sizeof(ar_file_header)); file_header_index++){
struct string file_identifier = string_strip_whitespace((struct string){.data = (char *)ar_file_header.file_identifier, .size = sizeof(ar_file_header.file_identifier)});
struct string file_size_string = string_strip_whitespace((struct string){.data = (char *)ar_file_header.file_size_in_bytes, .size = sizeof(ar_file_header.file_size_in_bytes)});
// Hack: We overwrite the file header here to make sure the file size string is zero-terminated.
file_size_string.data[file_size_string.size] = 0;
u64 file_size = strtoull(file_size_string.data, NULL, 10);
if(file_size == (u64)-1) return 0;
u8 *file_data = stream_read_array_by_pointer(&stream, 1, file_size);
// Each data section has two-byte alignment.
if(stream_skip(&stream, (file_size & 1)) || !file_data) return 0;
if(file_header_index == 0 && string_match(file_identifier, string("/"))){
// The first file section should be the first linker member.
// We only use the Microsoft-specific second linker member.
continue;
}
if(file_header_index == 1 && string_match(file_identifier, string("/"))){
// The first file section should be the second linker member.
// This is a Microsoft-specific thing.
little_endian_symbol_index = (struct stream){.data = file_data, .size = file_size};
continue;
}
if(string_match(file_identifier, string("//"))){
// Optionally, the long name data member should be immediately after the headers.
// This is only used by the first file header to store longer strings.
}
break;
}
if(!little_endian_symbol_index.data){
print("Error: Failed to parse library '%s', currently only Windows-style import libraries are supported.\n", file_name);
return 0;
}
//
// The second linker member, or Microsoft specific symbol index, has the following layout:
//
// u32 amount_of_members;
// u32 member_offsets[amount_of_members];
// u32 amount_of_symbols;
// u16 symbol_member_indices[amount_of_symbols];
// char string_table[]; // 'amount_of_symbols' many zero-terminated strings.
//
// The algorithm goes as follows:
//
// u32 symbol_index = binary_search(string_table, <identifier>);
// u16 member_index = symbol_member_indices[symbol_index];
// u32 member_offset = member_offsets[member_index - 1];
//
// struct ar_file_header *file_header = (void *)(file.data + member_offset);
// <parse the .obj or import-header>
//
// Ensure the string table is zero-terminated.
if(little_endian_symbol_index.data[little_endian_symbol_index.size-1] != 0) return 0;
u32 amount_of_members;
if(stream_read(&little_endian_symbol_index, &amount_of_members, sizeof(amount_of_members))) return 0;
u32 *member_offsets = stream_read_array_by_pointer(&little_endian_symbol_index, sizeof(u32), amount_of_members);
if(!member_offsets) return 0;
u32 amount_of_symbols;
if(stream_read(&little_endian_symbol_index, &amount_of_symbols, sizeof(amount_of_symbols))) return 0;
u16 *symbol_member_indices = stream_read_array_by_pointer(&little_endian_symbol_index, sizeof(u16), amount_of_symbols);
if(!symbol_member_indices) return 0;
char *string_buffer = (char *)(little_endian_symbol_index.data + little_endian_symbol_index.offset);
char *string_buffer_end = (char *)(little_endian_symbol_index.data + little_endian_symbol_index.size);
char **import_symbol_string_table = push_array(arena, char *, 0);
u64 amount_of_strings = 0;
for(char *it = string_buffer; it < string_buffer_end; it += strlen(it) + 1){
*push_struct(arena, char *) = it;
amount_of_strings++;
}
if(amount_of_strings != amount_of_symbols) return 0;
ar_file->amount_of_members = amount_of_members;
ar_file->amount_of_symbols = amount_of_symbols;
ar_file->member_offsets = member_offsets;
ar_file->symbol_member_indices = symbol_member_indices;
ar_file->symbol_string_table = import_symbol_string_table;
return 1;
}
struct dll_list{
struct dll_import_node *first;
struct dll_import_node *last;
u64 size;
};
struct dll_import_node{
struct dll_import_node *next;
char *name;
u64 dllimport_index;
u64 *import_address_table;
u64 *import_lookup_table;
u32 import_address_table_relative_virtual_address;
};
int strcmp_wrapper(const void *a, const void *b){
const char **_a = a;
const char **_b = b;
return strcmp(*_a, *_b);
}
struct file ar_lookup_symbol(struct ar_file *ar_file, char *symbol_name){
struct file zero_file = {0};
// The algorithm goes as follows:
//
// u32 symbol_index = binary_search(string_table, <identifier>);
// u16 member_index = symbol_member_indices[symbol_index];
// u32 member_offset = member_offsets[member_index - 1];
//
// struct ar_file_header *file_header = (void *)(file.data + member_offset);
// <parse the .obj or import-header>
char **found = bsearch(&symbol_name, ar_file->symbol_string_table, ar_file->amount_of_symbols, sizeof(*ar_file->symbol_string_table), strcmp_wrapper);
if(!found) return zero_file;
u32 symbol_index = (u32)(found - ar_file->symbol_string_table);
u16 member_index = ar_file->symbol_member_indices[symbol_index];
if((u32)(member_index - 1) > ar_file->amount_of_members) return zero_file;
u32 member_offset = ar_file->member_offsets[member_index-1];
struct ar_file_header *ar_file_header = stream_read_range_by_pointer(&ar_file->stream, member_offset, sizeof(struct ar_file_header), 1);
if(!ar_file_header) return zero_file;
struct string file_size_string = string_strip_whitespace((struct string){.data = (char *)ar_file_header->file_size_in_bytes, .size = sizeof(ar_file_header->file_size_in_bytes)});
// Hack: We overwrite the file header here to make sure the file size string is zero-terminated.
file_size_string.data[file_size_string.size] = 0;
u64 file_size = strtoull(file_size_string.data, NULL, 10);
file_size_string.data[file_size_string.size] = ' ';
if(file_size == (u64)-1) return zero_file;
u8 *file_data = stream_read_range_by_pointer(&ar_file->stream, member_offset + sizeof(*ar_file_header), 1, file_size);
if(!file_data) return zero_file;
return (struct file){.memory = file_data, .size = file_size};
}
//_____________________________________________________________________________________________________________________
// PDB includes
#include "msf.c"
#include "write_pdb.c"
//_____________________________________________________________________________________________________________________
// Helper functions
struct string get_symbol_name(struct coff_symbol *symbol, struct object_file *object_file){
u8 *string_table = object_file->string_table;
size_t string_table_size = object_file->string_table_size;
char *name;
size_t name_length;
if(symbol->long_name.zeroes == 0){
if(symbol->long_name.offset >= string_table_size){
print("Error: Object file '%s' has corrupt symbol table.\n", object_file->file_name);
return (struct string){0};
}
name = (char *)string_table + symbol->long_name.offset;
name_length = strlen(name);
}else{
name = symbol->short_name;
name_length = strnlen(name, sizeof(symbol->short_name));
}
return (struct string){.data = name, .size = name_length };
}
struct section_information{
struct coff_section_header *section_header;
struct object_file *object_file;
struct coff_section_header *image_section_header;
};
int compare_sections_to_combine(const void *a, const void *b){
const struct section_information *a_section_information = a;
const struct section_information *b_section_information = b;
// Most importantly sort by name.
int diff = strncmp((char *)a_section_information->section_header->name, (char *)b_section_information->section_header->name, 8);
if(diff) return diff;
// If the name is the same, prefer earlier object files.
diff = a_section_information->object_file - b_section_information->object_file;
if(diff) return diff;
// Lastly, if they are also in the same object file, compare the section headers.
return a_section_information->section_header - b_section_information->section_header;
}
int main(int argc, char *argv[]){
if(argc < 2){
print("Usage: %s <obj-files...>\n", argv[0]);
return 0;
}
struct memory_arena arena = create_memory_arena(giga_bytes(64));
u32 amount_of_files = argc-1;
u32 amount_of_ar_files = 0;
u32 amount_of_object_files = 0;
struct object_file *object_files = push_array(&arena, struct object_file, amount_of_files);
struct ar_file *ar_files = push_array(&arena, struct ar_file, amount_of_files);
//
// If available, get the %LIB% environment variable.
//
char *LIBPATH = getenv("LIB");
//
// Parse all the object files.
//
for(u32 index = 0; index < amount_of_files; index++){
char *file_name = argv[index + 1];
char *extension = (char *)_mbsrchr((u8 *)file_name, '.');
int success = 0;
if(strcmp(extension, ".obj") == 0){
struct file file = load_file(file_name);
if(!file.memory){
print("Error: Failed to load '%s'.\n", file_name);
return 1;
}
struct object_file object_file = {
.file_name = file_name,
.stream = {
.data = file.memory,
.size = file.size,
.offset = 0,
},
};
success = parse_object_file(&object_file);
object_files[amount_of_object_files++] = object_file;
}else if(strcmp(extension, ".lib") == 0){
struct file file = load_file(file_name);
if(!file.memory && LIBPATH){
char *path = LIBPATH;
while(*path){
char *end = (char *)_mbschr((unsigned char *)path, ';');
char buffer[0x100];
if(end){
snprintf(buffer, sizeof(buffer), "%.*s/%s", (int)(end - path), path, file_name);
path = end + 1;
}else{
snprintf(buffer, sizeof(buffer), "%s/%s", path, file_name);
path = "";
}
file = load_file(buffer);
if(file.memory) break;
}
}
if(!file.memory){
print("Error: Could not find '%s'.\n", file_name);
return 1;
}
struct ar_file ar_file = {
.file_name = file_name,
.stream = {
.data = file.memory,
.size = file.size,
.offset = 0,
},
};
success = parse_ar_file(&arena, &ar_file);
ar_files[amount_of_ar_files++] = ar_file;
}
if(!success){
print("Error: Parsing object file '%s'.\n", file_name);
return 1;
}
}
//
// Figure out all external symbols and put them into a hash table for future use.
//
u64 external_symbols_capacity = 0x100;
u64 external_symbols_size = 0;
struct external_symbol{
struct string name;
u32 size;
u32 is_defined;
u32 object_file_index;
u32 offset;
u16 section_number;
u32 is_ImageBase;
u16 hint;
u32 is_dllimport;
u32 dllimport_index;
struct dll_import_node *dll;
u32 is_dllimport_thunk;
u32 dllimport_thunk_index;
} *external_symbols = push_array(&arena, struct external_symbol, external_symbols_capacity);
{
// Add `__ImageBase` to the external_symbols.
struct external_symbol *image_base_symbol = &external_symbols[dbj2("__ImageBase", sizeof("__ImageBase") - 1) & (external_symbols_capacity - 1)];
image_base_symbol->name = string("__ImageBase");
image_base_symbol->is_ImageBase = 1;
external_symbols_size += 1;
}
for(u32 object_file_index = 0; object_file_index < amount_of_object_files; object_file_index++){
struct object_file *object_file = &object_files[object_file_index];
struct coff_symbol *symbol_table = object_file->symbol_table;
u8 *string_table = object_file->string_table;
u32 string_table_size = object_file->string_table_size;
u32 number_of_symbols = object_file->number_of_symbols;
for(u32 symbol_index = 0; symbol_index < number_of_symbols; symbol_index++){
struct coff_symbol *symbol = symbol_table + symbol_index;
if((symbol->storage_class != /*IMAGE_SYM_CLASS_EXTERNAL*/2) || (symbol->section_number < /*IMAGE_SYM_ABSOLUTE, IMAGE_SYM_DEBUG*/0)){
symbol_index += symbol->number_of_auxiliary_symbol_records;
continue;
}
//
// Grow the 'external_symbols' table if needed.
//
if(2 *(external_symbols_size + 1) > external_symbols_capacity){
u64 new_capacity = 2 * external_symbols_capacity;
struct external_symbol *new_symbols = push_array(&arena, struct external_symbol, new_capacity);
for(u64 old_index = 0; old_index < external_symbols_capacity; old_index++){
u64 name_hash = dbj2(external_symbols[old_index].name.data, external_symbols[old_index].name.size);
for(u64 new_index = 0; new_index < new_capacity; new_index++){
u64 hash_index = (name_hash + new_index) & (new_capacity - 1);
if(!new_symbols[hash_index].name.data){
new_symbols[hash_index] = external_symbols[old_index];
break;
}
}
}
external_symbols_capacity = new_capacity;
external_symbols = new_symbols;
}
char *name;
size_t name_length;
if(symbol->long_name.zeroes == 0){
if(symbol->long_name.offset >= string_table_size){
print("Error: Object file '%s' has corrupt symbol table.\n", object_file->file_name);
return 1;
}
name = (char *)string_table + symbol->long_name.offset;
name_length = strlen(name);
}else{
name = symbol->short_name;
name_length = strnlen(name, sizeof(symbol->short_name));
}
if(symbol->section_number > object_file->number_of_sections){
print("Error: Object file '%s' has symbol '%.*s' with invalid section number.\n", object_file->file_name, name_length, name);
return 1;
}
u32 is_defined = symbol->section_number != 0;
u32 size = is_defined ? 0 : symbol->value;
u64 name_hash = dbj2(name, name_length);
for(u64 table_index = 0; table_index < external_symbols_capacity; table_index++){
u64 hash_index = (name_hash + table_index) & (external_symbols_capacity - 1);
struct external_symbol *external_symbol = &external_symbols[hash_index];
if(external_symbol->name.data == 0){
// Insert a new entry.
external_symbol->name.data = name;
external_symbol->name.size = name_length;
external_symbol->size = size;
external_symbol->is_defined = is_defined;
external_symbol->offset = symbol->value;
external_symbol->section_number = symbol->section_number;
external_symbol->object_file_index = object_file_index;
external_symbols_size += 1;
break;
}
if(external_symbol->name.size == name_length && strncmp(external_symbol->name.data, name, name_length) == 0){
// We found the entry in the table.
if(external_symbol->is_defined && is_defined){
print("Warning: External symbol '%.*s' is defined more than once.\n", name_length, name);
break; // Just take the first one! This is a hack to get around `__declspec(selectany)`.
}
if(size && external_symbol->size && size != external_symbol->size){
print("Error: External symbol '%.*s' is specified both with size 0x%x and 0x%x.\n", name_length, name, size, external_symbols->size);
return 1;
}
if(is_defined){
external_symbol->is_defined = 1;
external_symbol->offset = symbol->value;
external_symbol->object_file_index = object_file_index;
external_symbol->section_number = symbol->section_number;
}
if(size) external_symbol->size = size;
break;
}
}
symbol_index += symbol->number_of_auxiliary_symbol_records;
}
}
//
// Scan the external symbols and put them into a couple different buckets:
//
// 1. defined symbols (we could find an external symbol with object file, section, offset)
// 2. undefined but sized symbols, these go into the .bss section later
// 3. dll-imports
// 4. Undefined symbols (errors)
//
struct dll_list dlls = {0};
int reference_to_undefined_symbol = 0;
u64 bss_size = 0;
u64 size_of_name_hint_table = 0;
u32 dllimport_thunk_count = 0;
for(u32 table_index = 0; table_index < external_symbols_capacity; table_index++){
struct external_symbol *symbol = &external_symbols[table_index];
if(!symbol->name.data) continue;
// Defined symbols are fine.
if(symbol->is_defined) continue;
// Symbols which have a size are added to the .bss section.
if(symbol->size){
symbol->offset = bss_size;
bss_size += symbol->size;
if(bss_size > 0xffffffff){
print("Error: The combined size of all uninitialized variables exceeds 32-bit.\n");
return 1;
}
continue;
}
// Ignore the special __ImageBase symbol.
if(symbol->is_ImageBase) continue;
//
// We have found an unresolved symbol, look it up in the library files.
//
if(symbol->name.size == 8){
// Make sure the string is zero-terminated.
char *new_data = push_array(&arena, char, 9);
memcpy(new_data, symbol->name.data, 8);
new_data[8] = 0;
symbol->name.data = new_data;
}
struct file symbol_object_file = {0};
for(u32 ar_index = 0; ar_index < amount_of_ar_files; ar_index++){
struct ar_file *ar = &ar_files[ar_index];
symbol_object_file = ar_lookup_symbol(ar, symbol->name.data);
if(symbol_object_file.memory) break;
}
if(!symbol_object_file.memory){
print("Error: Symbol '%.*s' was used but never defined.\n", symbol->name.size, symbol->name.data);
reference_to_undefined_symbol = 1;
continue;
}
//
// @cleanup: Length checks.
//
u16 signature_1 = *(u16 *)symbol_object_file.memory;
u16 signature_2 = *(u16 *)(symbol_object_file.memory + 2);
if(signature_1 == 0 && signature_2 == 0xffff){
// This is an import header.
u8 *file_data = symbol_object_file.memory;
u64 file_size = symbol_object_file.size;
// Layout:
// ar_import_header
// identifier (symbol)
// dll_name
//
struct ar_import_header{
u16 signature_1;
u16 signature_2;