-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspp_dlalloc.h
4039 lines (3600 loc) · 127 KB
/
spp_dlalloc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef spp_dlalloc__h_
#define spp_dlalloc__h_
/* This is a C++ allocator created from Doug Lea's dlmalloc
(Version 2.8.6 Wed Aug 29 06:57:58 2012)
see: http://g.oswego.edu/dl/html/malloc.html
*/
#include "spp_utils.h"
#include "spp_smartptr.h"
#ifndef SPP_FORCEINLINE
#if defined(__GNUC__)
#define SPP_FORCEINLINE __inline __attribute__ ((always_inline))
#elif defined(_MSC_VER)
#define SPP_FORCEINLINE __forceinline
#else
#define SPP_FORCEINLINE inline
#endif
#endif
#ifndef SPP_IMPL
#define SPP_IMPL SPP_FORCEINLINE
#endif
#ifndef SPP_API
#define SPP_API static
#endif
namespace spp
{
// ---------------------- allocator internal API -----------------------
typedef void* mspace;
/*
create_mspace creates and returns a new independent space with the
given initial capacity, or, if 0, the default granularity size. It
returns null if there is no system memory available to create the
space. If argument locked is non-zero, the space uses a separate
lock to control access. The capacity of the space will grow
dynamically as needed to service mspace_malloc requests. You can
control the sizes of incremental increases of this space by
compiling with a different SPP_DEFAULT_GRANULARITY or dynamically
setting with mallopt(M_GRANULARITY, value).
*/
SPP_API mspace create_mspace(size_t capacity, int locked);
SPP_API size_t destroy_mspace(mspace msp);
SPP_API void* mspace_malloc(mspace msp, size_t bytes);
SPP_API void mspace_free(mspace msp, void* mem);
SPP_API void* mspace_realloc(mspace msp, void* mem, size_t newsize);
#if 0
SPP_API mspace create_mspace_with_base(void* base, size_t capacity, int locked);
SPP_API int mspace_track_large_chunks(mspace msp, int enable);
SPP_API void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
SPP_API void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
SPP_API void** mspace_independent_calloc(mspace msp, size_t n_elements,
size_t elem_size, void* chunks[]);
SPP_API void** mspace_independent_comalloc(mspace msp, size_t n_elements,
size_t sizes[], void* chunks[]);
SPP_API size_t mspace_footprint(mspace msp);
SPP_API size_t mspace_max_footprint(mspace msp);
SPP_API size_t mspace_usable_size(const void* mem);
SPP_API int mspace_trim(mspace msp, size_t pad);
SPP_API int mspace_mallopt(int, int);
#endif
// -----------------------------------------------------------
// -----------------------------------------------------------
struct MSpace : public spp_rc
{
MSpace() :
_sp(create_mspace(0, 0))
{}
~MSpace()
{
destroy_mspace(_sp);
}
mspace _sp;
};
// -----------------------------------------------------------
// -----------------------------------------------------------
template<class T>
class spp_allocator
{
public:
typedef T value_type;
typedef T* pointer;
typedef ptrdiff_t difference_type;
typedef const T* const_pointer;
typedef size_t size_type;
MSpace *getSpace() const { return _space.get(); }
spp_allocator() : _space(new MSpace) {}
template<class U>
spp_allocator(const spp_allocator<U> &o) : _space(o.getSpace()) {}
template<class U>
spp_allocator& operator=(const spp_allocator<U> &o)
{
if (&o != this)
_space = o.getSpace();
return *this;
}
void swap(spp_allocator &o)
{
std::swap(_space, o._space);
}
pointer allocate(size_t n, const_pointer /* unused */ = 0)
{
pointer res = static_cast<pointer>(mspace_malloc(_space->_sp, n * sizeof(T)));
if (!res)
throw std::bad_alloc();
return res;
}
void deallocate(pointer p, size_t /* unused */)
{
mspace_free(_space->_sp, p);
}
pointer reallocate(pointer p, size_t new_size)
{
pointer res = static_cast<pointer>(mspace_realloc(_space->_sp, p, new_size * sizeof(T)));
if (!res)
throw std::bad_alloc();
return res;
}
size_type max_size() const
{
return static_cast<size_type>(-1) / sizeof(value_type);
}
void construct(pointer p, const value_type& val)
{
new (p) value_type(val);
}
void destroy(pointer p) { p->~value_type(); }
template<class U>
struct rebind
{
// rebind to libc_allocator because we want to use malloc_inspect_all in destructive_iterator
// to reduce peak memory usage (we don't want <group_items> mixed with value_type when
// we traverse the allocated memory).
typedef spp::spp_allocator<U> other;
};
mspace space() const { return _space->_sp; }
// check if we can clear the whole allocator memory at once => works only if the allocator
// is not be shared. If can_clear() returns true, we expect that the next allocator call
// will be clear() - not allocate() or deallocate()
bool can_clear()
{
assert(!_space_to_clear);
_space_to_clear.reset();
_space_to_clear.swap(_space);
if (_space_to_clear->count() == 1)
return true;
else
_space_to_clear.swap(_space);
return false;
}
void clear()
{
assert(!_space && _space_to_clear);
_space_to_clear.reset();
_space = new MSpace;
}
private:
spp_sptr<MSpace> _space;
spp_sptr<MSpace> _space_to_clear;
};
}
// allocators are "equal" whenever memory allocated with one can be deallocated with the other
template<class T>
inline bool operator==(const spp_::spp_allocator<T> &a, const spp_::spp_allocator<T> &b)
{
return a.space() == b.space();
}
template<class T>
inline bool operator!=(const spp_::spp_allocator<T> &a, const spp_::spp_allocator<T> &b)
{
return !(a == b);
}
namespace std
{
template <class T>
inline void swap(spp_::spp_allocator<T> &a, spp_::spp_allocator<T> &b)
{
a.swap(b);
}
}
#if !defined(SPP_EXCLUDE_IMPLEMENTATION)
#ifndef WIN32
#ifdef _WIN32
#define WIN32 1
#endif
#ifdef _WIN32_WCE
#define SPP_LACKS_FCNTL_H
#define WIN32 1
#endif
#endif
#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <tchar.h>
#define SPP_HAVE_MMAP 1
#define SPP_LACKS_UNISTD_H
#define SPP_LACKS_SYS_PARAM_H
#define SPP_LACKS_SYS_MMAN_H
#define SPP_LACKS_STRING_H
#define SPP_LACKS_STRINGS_H
#define SPP_LACKS_SYS_TYPES_H
#define SPP_LACKS_ERRNO_H
#define SPP_LACKS_SCHED_H
#ifndef SPP_MALLOC_FAILURE_ACTION
#define SPP_MALLOC_FAILURE_ACTION
#endif
#ifndef SPP_MMAP_CLEARS
#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
#define SPP_MMAP_CLEARS 0
#else
#define SPP_MMAP_CLEARS 1
#endif
#endif
#endif
#if defined(DARWIN) || defined(_DARWIN)
#define SPP_HAVE_MMAP 1
/* OSX allocators provide 16 byte alignment */
#ifndef SPP_MALLOC_ALIGNMENT
#define SPP_MALLOC_ALIGNMENT ((size_t)16U)
#endif
#endif
#ifndef SPP_LACKS_SYS_TYPES_H
#include <sys/types.h> /* For size_t */
#endif
#ifndef SPP_MALLOC_ALIGNMENT
#define SPP_MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
#endif
/* ------------------- size_t and alignment properties -------------------- */
static const size_t spp_max_size_t = ~(size_t)0;
static const size_t spp_size_t_bitsize = sizeof(size_t) << 3;
static const size_t spp_half_max_size_t = spp_max_size_t / 2U;
static const size_t spp_chunk_align_mask = SPP_MALLOC_ALIGNMENT - 1;
#if defined(SPP_DEBUG) || !defined(NDEBUG)
static bool spp_is_aligned(void *p) { return ((size_t)p & spp_chunk_align_mask) == 0; }
#endif
// the number of bytes to offset an address to align it
static size_t align_offset(void *p)
{
return (((size_t)p & spp_chunk_align_mask) == 0) ? 0 :
((SPP_MALLOC_ALIGNMENT - ((size_t)p & spp_chunk_align_mask)) & spp_chunk_align_mask);
}
#ifndef SPP_FOOTERS
#define SPP_FOOTERS 0
#endif
#ifndef SPP_ABORT
#define SPP_ABORT abort()
#endif
#ifndef SPP_ABORT_ON_ASSERT_FAILURE
#define SPP_ABORT_ON_ASSERT_FAILURE 1
#endif
#ifndef SPP_PROCEED_ON_ERROR
#define SPP_PROCEED_ON_ERROR 0
#endif
#ifndef SPP_INSECURE
#define SPP_INSECURE 0
#endif
#ifndef SPP_MALLOC_INSPECT_ALL
#define SPP_MALLOC_INSPECT_ALL 0
#endif
#ifndef SPP_HAVE_MMAP
#define SPP_HAVE_MMAP 1
#endif
#ifndef SPP_MMAP_CLEARS
#define SPP_MMAP_CLEARS 1
#endif
#ifndef SPP_HAVE_MREMAP
#ifdef linux
#define SPP_HAVE_MREMAP 1
#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* Turns on mremap() definition */
#endif
#else
#define SPP_HAVE_MREMAP 0
#endif
#endif
#ifndef SPP_MALLOC_FAILURE_ACTION
// ENOMEM = 12
#define SPP_MALLOC_FAILURE_ACTION errno = 12
#endif
#ifndef SPP_DEFAULT_GRANULARITY
#if defined(WIN32)
#define SPP_DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
#else
#define SPP_DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
#endif
#endif
#ifndef SPP_DEFAULT_TRIM_THRESHOLD
#define SPP_DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
#endif
#ifndef SPP_DEFAULT_MMAP_THRESHOLD
#if SPP_HAVE_MMAP
#define SPP_DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
#else
#define SPP_DEFAULT_MMAP_THRESHOLD spp_max_size_t
#endif
#endif
#ifndef SPP_MAX_RELEASE_CHECK_RATE
#if SPP_HAVE_MMAP
#define SPP_MAX_RELEASE_CHECK_RATE 4095
#else
#define SPP_MAX_RELEASE_CHECK_RATE spp_max_size_t
#endif
#endif
#ifndef SPP_USE_BUILTIN_FFS
#define SPP_USE_BUILTIN_FFS 0
#endif
#ifndef SPP_USE_DEV_RANDOM
#define SPP_USE_DEV_RANDOM 0
#endif
#ifndef SPP_NO_SEGMENT_TRAVERSAL
#define SPP_NO_SEGMENT_TRAVERSAL 0
#endif
/*------------------------------ internal #includes ---------------------- */
#ifdef _MSC_VER
#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
#endif
#ifndef SPP_LACKS_ERRNO_H
#include <errno.h> /* for SPP_MALLOC_FAILURE_ACTION */
#endif
#ifdef SPP_DEBUG
#if SPP_ABORT_ON_ASSERT_FAILURE
#undef assert
#define assert(x) if(!(x)) SPP_ABORT
#else
#include <assert.h>
#endif
#else
#ifndef assert
#define assert(x)
#endif
#define SPP_DEBUG 0
#endif
#if !defined(WIN32) && !defined(SPP_LACKS_TIME_H)
#include <time.h> /* for magic initialization */
#endif
#ifndef SPP_LACKS_STDLIB_H
#include <stdlib.h> /* for abort() */
#endif
#ifndef SPP_LACKS_STRING_H
#include <string.h> /* for memset etc */
#endif
#if SPP_USE_BUILTIN_FFS
#ifndef SPP_LACKS_STRINGS_H
#include <strings.h> /* for ffs */
#endif
#endif
#if SPP_HAVE_MMAP
#ifndef SPP_LACKS_SYS_MMAN_H
/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
#if (defined(linux) && !defined(__USE_GNU))
#define __USE_GNU 1
#include <sys/mman.h> /* for mmap */
#undef __USE_GNU
#else
#include <sys/mman.h> /* for mmap */
#endif
#endif
#ifndef SPP_LACKS_FCNTL_H
#include <fcntl.h>
#endif
#endif
#ifndef SPP_LACKS_UNISTD_H
#include <unistd.h> /* for sbrk, sysconf */
#else
#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
extern void* sbrk(ptrdiff_t);
#endif
#endif
#include <new>
namespace spp
{
/* Declarations for bit scanning on win32 */
#if defined(_MSC_VER) && _MSC_VER>=1300
#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
extern "C" {
unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
}
#define BitScanForward _BitScanForward
#define BitScanReverse _BitScanReverse
#pragma intrinsic(_BitScanForward)
#pragma intrinsic(_BitScanReverse)
#endif /* BitScanForward */
#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
#ifndef WIN32
#ifndef malloc_getpagesize
#ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
#ifndef _SC_PAGE_SIZE
#define _SC_PAGE_SIZE _SC_PAGESIZE
#endif
#endif
#ifdef _SC_PAGE_SIZE
#define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
#else
#if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
extern size_t getpagesize();
#define malloc_getpagesize getpagesize()
#else
#ifdef WIN32 /* use supplied emulation of getpagesize */
#define malloc_getpagesize getpagesize()
#else
#ifndef SPP_LACKS_SYS_PARAM_H
#include <sys/param.h>
#endif
#ifdef EXEC_PAGESIZE
#define malloc_getpagesize EXEC_PAGESIZE
#else
#ifdef NBPG
#ifndef CLSIZE
#define malloc_getpagesize NBPG
#else
#define malloc_getpagesize (NBPG * CLSIZE)
#endif
#else
#ifdef NBPC
#define malloc_getpagesize NBPC
#else
#ifdef PAGESIZE
#define malloc_getpagesize PAGESIZE
#else /* just guess */
#define malloc_getpagesize ((size_t)4096U)
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
#endif
/* -------------------------- MMAP preliminaries ------------------------- */
/*
If SPP_HAVE_MORECORE or SPP_HAVE_MMAP are false, we just define calls and
checks to fail so compiler optimizer can delete code rather than
using so many "#if"s.
*/
/* MMAP must return mfail on failure */
static void *mfail = (void*)spp_max_size_t;
static char *cmfail = (char*)mfail;
#if SPP_HAVE_MMAP
#ifndef WIN32
#define SPP_MUNMAP_DEFAULT(a, s) munmap((a), (s))
#define SPP_MMAP_PROT (PROT_READ | PROT_WRITE)
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
#endif
#ifdef MAP_ANONYMOUS
#define SPP_MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#define SPP_MMAP_DEFAULT(s) mmap(0, (s), SPP_MMAP_PROT, SPP_MMAP_FLAGS, -1, 0)
#else /* MAP_ANONYMOUS */
/*
Nearly all versions of mmap support MAP_ANONYMOUS, so the following
is unlikely to be needed, but is supplied just in case.
*/
#define SPP_MMAP_FLAGS (MAP_PRIVATE)
static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
void SPP_MMAP_DEFAULT(size_t s)
{
if (dev_zero_fd < 0)
dev_zero_fd = open("/dev/zero", O_RDWR);
mmap(0, s, SPP_MMAP_PROT, SPP_MMAP_FLAGS, dev_zero_fd, 0);
}
#endif /* MAP_ANONYMOUS */
#define SPP_DIRECT_MMAP_DEFAULT(s) SPP_MMAP_DEFAULT(s)
#else /* WIN32 */
/* Win32 MMAP via VirtualAlloc */
static SPP_FORCEINLINE void* win32mmap(size_t size)
{
void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
return (ptr != 0) ? ptr : mfail;
}
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
static SPP_FORCEINLINE void* win32direct_mmap(size_t size)
{
void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN,
PAGE_READWRITE);
return (ptr != 0) ? ptr : mfail;
}
/* This function supports releasing coalesed segments */
static SPP_FORCEINLINE int win32munmap(void* ptr, size_t size)
{
MEMORY_BASIC_INFORMATION minfo;
char* cptr = (char*)ptr;
while (size)
{
if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
return -1;
if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
minfo.State != MEM_COMMIT || minfo.RegionSize > size)
return -1;
if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
return -1;
cptr += minfo.RegionSize;
size -= minfo.RegionSize;
}
return 0;
}
#define SPP_MMAP_DEFAULT(s) win32mmap(s)
#define SPP_MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
#define SPP_DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
#endif /* WIN32 */
#endif /* SPP_HAVE_MMAP */
#if SPP_HAVE_MREMAP
#ifndef WIN32
#define SPP_MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
#endif
#endif
/**
* Define SPP_CALL_MMAP/SPP_CALL_MUNMAP/SPP_CALL_DIRECT_MMAP
*/
#if SPP_HAVE_MMAP
#define USE_MMAP_BIT 1
#ifdef SPP_MMAP
#define SPP_CALL_MMAP(s) SPP_MMAP(s)
#else
#define SPP_CALL_MMAP(s) SPP_MMAP_DEFAULT(s)
#endif
#ifdef SPP_MUNMAP
#define SPP_CALL_MUNMAP(a, s) SPP_MUNMAP((a), (s))
#else
#define SPP_CALL_MUNMAP(a, s) SPP_MUNMAP_DEFAULT((a), (s))
#endif
#ifdef SPP_DIRECT_MMAP
#define SPP_CALL_DIRECT_MMAP(s) SPP_DIRECT_MMAP(s)
#else
#define SPP_CALL_DIRECT_MMAP(s) SPP_DIRECT_MMAP_DEFAULT(s)
#endif
#else /* SPP_HAVE_MMAP */
#define USE_MMAP_BIT 0
#define SPP_MMAP(s) mfail
#define SPP_MUNMAP(a, s) (-1)
#define SPP_DIRECT_MMAP(s) mfail
#define SPP_CALL_DIRECT_MMAP(s) SPP_DIRECT_MMAP(s)
#define SPP_CALL_MMAP(s) SPP_MMAP(s)
#define SPP_CALL_MUNMAP(a, s) SPP_MUNMAP((a), (s))
#endif
/**
* Define SPP_CALL_MREMAP
*/
#if SPP_HAVE_MMAP && SPP_HAVE_MREMAP
#ifdef MREMAP
#define SPP_CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
#else
#define SPP_CALL_MREMAP(addr, osz, nsz, mv) SPP_MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
#endif
#else
#define SPP_CALL_MREMAP(addr, osz, nsz, mv) mfail
#endif
/* mstate bit set if continguous morecore disabled or failed */
static const unsigned USE_NONCONTIGUOUS_BIT = 4U;
/* segment bit set in create_mspace_with_base */
static const unsigned EXTERN_BIT = 8U;
/* --------------------------- flags ------------------------ */
static const unsigned PINUSE_BIT = 1;
static const unsigned CINUSE_BIT = 2;
static const unsigned FLAG4_BIT = 4;
static const unsigned INUSE_BITS = (PINUSE_BIT | CINUSE_BIT);
static const unsigned FLAG_BITS = (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT);
/* ------------------- Chunks sizes and alignments ----------------------- */
#if SPP_FOOTERS
static const unsigned CHUNK_OVERHEAD = 2 * sizeof(size_t);
#else
static const unsigned CHUNK_OVERHEAD = sizeof(size_t);
#endif
/* MMapped chunks need a second word of overhead ... */
static const unsigned SPP_MMAP_CHUNK_OVERHEAD = 2 * sizeof(size_t);
/* ... and additional padding for fake next-chunk at foot */
static const unsigned SPP_MMAP_FOOT_PAD = 4 * sizeof(size_t);
// ===============================================================================
struct malloc_chunk_header
{
void set_size_and_pinuse_of_free_chunk(size_t s)
{
_head = s | PINUSE_BIT;
set_foot(s);
}
void set_foot(size_t s)
{
((malloc_chunk_header *)((char*)this + s))->_prev_foot = s;
}
// extraction of fields from head words
bool cinuse() const { return !!(_head & CINUSE_BIT); }
bool pinuse() const { return !!(_head & PINUSE_BIT); }
bool flag4inuse() const { return !!(_head & FLAG4_BIT); }
bool is_inuse() const { return (_head & INUSE_BITS) != PINUSE_BIT; }
bool is_mmapped() const { return (_head & INUSE_BITS) == 0; }
size_t chunksize() const { return _head & ~(FLAG_BITS); }
void clear_pinuse() { _head &= ~PINUSE_BIT; }
void set_flag4() { _head |= FLAG4_BIT; }
void clear_flag4() { _head &= ~FLAG4_BIT; }
// Treat space at ptr +/- offset as a chunk
malloc_chunk_header * chunk_plus_offset(size_t s)
{
return (malloc_chunk_header *)((char*)this + s);
}
malloc_chunk_header * chunk_minus_offset(size_t s)
{
return (malloc_chunk_header *)((char*)this - s);
}
// Ptr to next or previous physical malloc_chunk.
malloc_chunk_header * next_chunk()
{
return (malloc_chunk_header *)((char*)this + (_head & ~FLAG_BITS));
}
malloc_chunk_header * prev_chunk()
{
return (malloc_chunk_header *)((char*)this - (_prev_foot));
}
// extract next chunk's pinuse bit
size_t next_pinuse() { return next_chunk()->_head & PINUSE_BIT; }
size_t _prev_foot; // Size of previous chunk (if free).
size_t _head; // Size and inuse bits.
};
// ===============================================================================
struct malloc_chunk : public malloc_chunk_header
{
// Set size, pinuse bit, foot, and clear next pinuse
void set_free_with_pinuse(size_t s, malloc_chunk* n)
{
n->clear_pinuse();
set_size_and_pinuse_of_free_chunk(s);
}
// Get the internal overhead associated with chunk p
size_t overhead_for() { return is_mmapped() ? SPP_MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD; }
// Return true if malloced space is not necessarily cleared
bool calloc_must_clear()
{
#if SPP_MMAP_CLEARS
return !is_mmapped();
#else
return true;
#endif
}
struct malloc_chunk* _fd; // double links -- used only if free.
struct malloc_chunk* _bk;
};
static const unsigned MCHUNK_SIZE = sizeof(malloc_chunk);
/* The smallest size we can malloc is an aligned minimal chunk */
static const unsigned MIN_CHUNK_SIZE = (MCHUNK_SIZE + spp_chunk_align_mask) & ~spp_chunk_align_mask;
typedef malloc_chunk mchunk;
typedef malloc_chunk* mchunkptr;
typedef malloc_chunk_header *hchunkptr;
typedef malloc_chunk* sbinptr; // The type of bins of chunks
typedef unsigned int bindex_t; // Described below
typedef unsigned int binmap_t; // Described below
typedef unsigned int flag_t; // The type of various bit flag sets
// conversion from malloc headers to user pointers, and back
static SPP_FORCEINLINE void *chunk2mem(const void *p) { return (void *)((char *)p + 2 * sizeof(size_t)); }
static SPP_FORCEINLINE mchunkptr mem2chunk(const void *mem) { return (mchunkptr)((char *)mem - 2 * sizeof(size_t)); }
// chunk associated with aligned address A
static SPP_FORCEINLINE mchunkptr align_as_chunk(char *A) { return (mchunkptr)(A + align_offset(chunk2mem(A))); }
// Bounds on request (not chunk) sizes.
static const unsigned MAX_REQUEST = (-MIN_CHUNK_SIZE) << 2;
static const unsigned MIN_REQUEST = MIN_CHUNK_SIZE - CHUNK_OVERHEAD - 1;
// pad request bytes into a usable size
static SPP_FORCEINLINE size_t pad_request(size_t req)
{
return (req + CHUNK_OVERHEAD + spp_chunk_align_mask) & ~spp_chunk_align_mask;
}
// pad request, checking for minimum (but not maximum)
static SPP_FORCEINLINE size_t request2size(size_t req)
{
return req < MIN_REQUEST ? MIN_CHUNK_SIZE : pad_request(req);
}
/* ------------------ Operations on head and foot fields ----------------- */
/*
The head field of a chunk is or'ed with PINUSE_BIT when previous
adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
use, unless mmapped, in which case both bits are cleared.
FLAG4_BIT is not used by this malloc, but might be useful in extensions.
*/
// Head value for fenceposts
static const unsigned FENCEPOST_HEAD = INUSE_BITS | sizeof(size_t);
/* ---------------------- Overlaid data structures ----------------------- */
/*
When chunks are not in use, they are treated as nodes of either
lists or trees.
"Small" chunks are stored in circular doubly-linked lists, and look
like this:
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`head:' | Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Forward pointer to next chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Back pointer to previous chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Unused space (may be 0 bytes long) .
. .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`foot:' | Size of chunk, in bytes |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Larger chunks are kept in a form of bitwise digital trees (aka
tries) keyed on chunksizes. Because malloc_tree_chunks are only for
free chunks greater than 256 bytes, their size doesn't impose any
constraints on user chunk sizes. Each node looks like:
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`head:' | Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Forward pointer to next chunk of same size |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Back pointer to previous chunk of same size |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Pointer to left child (child[0]) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Pointer to right child (child[1]) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Pointer to parent |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| bin index of this chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Unused space .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`foot:' | Size of chunk, in bytes |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Each tree holding treenodes is a tree of unique chunk sizes. Chunks
of the same size are arranged in a circularly-linked list, with only
the oldest chunk (the next to be used, in our FIFO ordering)
actually in the tree. (Tree members are distinguished by a non-null
parent pointer.) If a chunk with the same size an an existing node
is inserted, it is linked off the existing node using pointers that
work in the same way as fd/bk pointers of small chunks.
Each tree contains a power of 2 sized range of chunk sizes (the
smallest is 0x100 <= x < 0x180), which is is divided in half at each
tree level, with the chunks in the smaller half of the range (0x100
<= x < 0x140 for the top nose) in the left subtree and the larger
half (0x140 <= x < 0x180) in the right subtree. This is, of course,
done by inspecting individual bits.
Using these rules, each node's left subtree contains all smaller
sizes than its right subtree. However, the node at the root of each
subtree has no particular ordering relationship to either. (The
dividing line between the subtree sizes is based on trie relation.)
If we remove the last chunk of a given size from the interior of the
tree, we need to replace it with a leaf node. The tree ordering
rules permit a node to be replaced by any leaf below it.
The smallest chunk in a tree (a common operation in a best-fit
allocator) can be found by walking a path to the leftmost leaf in
the tree. Unlike a usual binary tree, where we follow left child
pointers until we reach a null, here we follow the right child
pointer any time the left one is null, until we reach a leaf with
both child pointers null. The smallest chunk in the tree will be
somewhere along that path.
The worst case number of steps to add, find, or remove a node is
bounded by the number of bits differentiating chunks within
bins. Under current bin calculations, this ranges from 6 up to 21
(for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
is of course much better.
*/
// ===============================================================================
struct malloc_tree_chunk : public malloc_chunk_header
{
malloc_tree_chunk *leftmost_child()
{
return _child[0] ? _child[0] : _child[1];
}
malloc_tree_chunk* _fd;
malloc_tree_chunk* _bk;
malloc_tree_chunk* _child[2];
malloc_tree_chunk* _parent;
bindex_t _index;
};
typedef malloc_tree_chunk tchunk;
typedef malloc_tree_chunk* tchunkptr;
typedef malloc_tree_chunk* tbinptr; // The type of bins of trees
/* ----------------------------- Segments -------------------------------- */
/*
Each malloc space may include non-contiguous segments, held in a
list headed by an embedded malloc_segment record representing the
top-most space. Segments also include flags holding properties of
the space. Large chunks that are directly allocated by mmap are not
included in this list. They are instead independently created and
destroyed without otherwise keeping track of them.
Segment management mainly comes into play for spaces allocated by
MMAP. Any call to MMAP might or might not return memory that is
adjacent to an existing segment. MORECORE normally contiguously
extends the current space, so this space is almost always adjacent,
which is simpler and faster to deal with. (This is why MORECORE is
used preferentially to MMAP when both are available -- see
sys_alloc.) When allocating using MMAP, we don't use any of the
hinting mechanisms (inconsistently) supported in various
implementations of unix mmap, or distinguish reserving from
committing memory. Instead, we just ask for space, and exploit
contiguity when we get it. It is probably possible to do
better than this on some systems, but no general scheme seems
to be significantly better.
Management entails a simpler variant of the consolidation scheme
used for chunks to reduce fragmentation -- new adjacent memory is
normally prepended or appended to an existing segment. However,
there are limitations compared to chunk consolidation that mostly
reflect the fact that segment processing is relatively infrequent
(occurring only when getting memory from system) and that we
don't expect to have huge numbers of segments:
* Segments are not indexed, so traversal requires linear scans. (It
would be possible to index these, but is not worth the extra
overhead and complexity for most programs on most platforms.)
* New segments are only appended to old ones when holding top-most
memory; if they cannot be prepended to others, they are held in
different segments.
Except for the top-most segment of an mstate, each segment record
is kept at the tail of its segment. Segments are added by pushing
segment records onto the list headed by &mstate.seg for the
containing mstate.
Segment flags control allocation/merge/deallocation policies:
* If EXTERN_BIT set, then we did not allocate this segment,
and so should not try to deallocate or merge with others.
(This currently holds only for the initial segment passed
into create_mspace_with_base.)
* If USE_MMAP_BIT set, the segment may be merged with
other surrounding mmapped segments and trimmed/de-allocated
using munmap.
* If neither bit is set, then the segment was obtained using
MORECORE so can be merged with surrounding MORECORE'd segments
and deallocated/trimmed using MORECORE with negative arguments.
*/
// ===============================================================================
struct malloc_segment
{
bool is_mmapped_segment() { return !!(_sflags & USE_MMAP_BIT); }
bool is_extern_segment() { return !!(_sflags & EXTERN_BIT); }
char* _base; // base address
size_t _size; // allocated size
malloc_segment* _next; // ptr to next segment
flag_t _sflags; // mmap and extern flag
};
typedef malloc_segment msegment;
typedef malloc_segment* msegmentptr;
/* ------------- Malloc_params ------------------- */
/*
malloc_params holds global properties, including those that can be
dynamically set using mallopt. There is a single instance, mparams,
initialized in init_mparams. Note that the non-zeroness of "magic"
also serves as an initialization flag.
*/