summaryrefslogtreecommitdiff
path: root/libc/stdlib/malloc-standard/malloc.h
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2003-12-30 10:40:49 +0000
committerEric Andersen <andersen@codepoet.org>2003-12-30 10:40:49 +0000
commit8d532c51318bad2436880ecac972c9dfa3996c9b (patch)
tree821863358734242feb99643e9d66ee9b175ad464 /libc/stdlib/malloc-standard/malloc.h
parent4c9086ee4afde4257a4b4a8f55e05932d1b6acfd (diff)
Rework malloc. The new default implementation is based on dlmalloc from Doug
Lea. It is about 2x faster than the old malloc-930716, and behave itself much better -- it will properly release memory back to the system, and it uses a combination of brk() for small allocations and mmap() for larger allocations. -Erik
Diffstat (limited to 'libc/stdlib/malloc-standard/malloc.h')
-rw-r--r--libc/stdlib/malloc-standard/malloc.h953
1 files changed, 953 insertions, 0 deletions
diff --git a/libc/stdlib/malloc-standard/malloc.h b/libc/stdlib/malloc-standard/malloc.h
new file mode 100644
index 000000000..46858332d
--- /dev/null
+++ b/libc/stdlib/malloc-standard/malloc.h
@@ -0,0 +1,953 @@
+/*
+ This is a version (aka dlmalloc) of malloc/free/realloc written by
+ Doug Lea and released to the public domain. Use, modify, and
+ redistribute this code without permission or acknowledgement in any
+ way you wish. Send questions, comments, complaints, performance
+ data, etc to dl@cs.oswego.edu
+
+ VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
+
+ Note: There may be an updated version of this malloc obtainable at
+ ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+ Check before installing!
+
+ Hacked up for uClibc by Erik Andersen <andersen@codepoet.org>
+*/
+
+#include <features.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <malloc.h>
+
+
+#ifdef __UCLIBC_HAS_THREADS__
+#include <pthread.h>
+extern pthread_mutex_t __malloc_lock;
+# define LOCK __pthread_mutex_lock(&__malloc_lock)
+# define UNLOCK __pthread_mutex_unlock(&__malloc_lock);
+#else
+# define LOCK
+# define UNLOCK
+#endif
+
+
+
+/*
+ MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+ It must be a power of two at least 2 * (sizeof(size_t)), even on machines
+ for which smaller alignments would suffice. It may be defined as
+ larger than this though. Note however that code and data structures
+ are optimized for the case of 8-byte alignment.
+*/
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT (2 * (sizeof(size_t)))
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
+
+/*
+ TRIM_FASTBINS controls whether free() of a very small chunk can
+ immediately lead to trimming. Setting to true (1) can reduce memory
+ footprint, but will almost always slow down programs that use a lot
+ of small chunks.
+
+ Define this only if you are willing to give up some speed to more
+ aggressively reduce system-level memory footprint when releasing
+ memory in programs that use many small chunks. You can get
+ essentially the same effect by setting MXFAST to 0, but this can
+ lead to even greater slowdowns in programs using many small chunks.
+ TRIM_FASTBINS is an in-between compile-time option, that disables
+ only those chunks bordering topmost memory from being placed in
+ fastbins.
+*/
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS 0
+#endif
+
+
+/*
+ MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+/*
+ MORECORE is the name of the routine to call to obtain more memory
+ from the system. See below for general guidance on writing
+ alternative MORECORE functions, as well as a version for WIN32 and a
+ sample version for pre-OSX macos.
+*/
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+/*
+ MORECORE_FAILURE is the value returned upon failure of MORECORE
+ as well as mmap. Since it cannot be an otherwise valid memory address,
+ and must reflect values of standard sys calls, you probably ought not
+ try to redefine it.
+*/
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE (-1)
+#endif
+
+/*
+ If MORECORE_CONTIGUOUS is true, take advantage of fact that
+ consecutive calls to MORECORE with positive arguments always return
+ contiguous increasing addresses. This is true of unix sbrk. Even
+ if not defined, when regions happen to be contiguous, malloc will
+ permit allocations spanning regions obtained from different
+ calls. But defining this when applicable enables some stronger
+ consistency checks and space efficiencies.
+*/
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+ MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+ sbrk fails, and mmap is used as a backup (which is done only if
+ HAVE_MMAP). The value must be a multiple of page size. This
+ backup strategy generally applies only when systems have "holes" in
+ address space, so sbrk cannot perform contiguous expansion, but
+ there is still space available on system. On systems for which
+ this is known to be useful (i.e. most linux kernels), this occurs
+ only when programs allocate huge amounts of memory. Between this,
+ and the fact that mmap regions tend to be limited, the size should
+ be large, to avoid too many mmap calls and thus avoid running out
+ of kernel resources.
+*/
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+/*
+ The system page size. To the extent possible, this malloc manages
+ memory from the system in page-size units. Note that this value is
+ cached during initialization into a field of malloc_state. So even
+ if malloc_getpagesize is a function, it is only called once.
+
+ The following mechanics for getpagesize were adapted from bsd/gnu
+ getpagesize.h. If none of the system-probes here apply, a value of
+ 4096 is used, which should be OK: If they don't apply, then using
+ the actual value probably doesn't impact performance.
+*/
+#ifndef malloc_getpagesize
+# include <unistd.h>
+# define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#else /* just guess */
+# define malloc_getpagesize (4096)
+#endif
+
+
+/* mallopt tuning options */
+
+/*
+ M_MXFAST is the maximum request size used for "fastbins", special bins
+ that hold returned chunks without consolidating their spaces. This
+ enables future requests for chunks of the same size to be handled
+ very quickly, but can increase fragmentation, and thus increase the
+ overall memory footprint of a program.
+
+ This malloc manages fastbins very conservatively yet still
+ efficiently, so fragmentation is rarely a problem for values less
+ than or equal to the default. The maximum supported value of MXFAST
+ is 80. You wouldn't want it any higher than this anyway. Fastbins
+ are designed especially for use with many small structs, objects or
+ strings -- the default handles structs/objects/arrays with sizes up
+ to 16 4byte fields, or small strings representing words, tokens,
+ etc. Using fastbins for larger objects normally worsens
+ fragmentation without improving speed.
+
+ M_MXFAST is set in REQUEST size units. It is internally used in
+ chunksize units, which adds padding and alignment. You can reduce
+ M_MXFAST to 0 to disable all use of fastbins. This causes the malloc
+ algorithm to be a closer approximation of fifo-best-fit in all cases,
+ not just for larger requests, but will generally cause it to be
+ slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST 1
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST 64
+#endif
+
+
+/*
+ M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+ to keep before releasing via malloc_trim in free().
+
+ Automatic trimming is mainly useful in long-lived programs.
+ Because trimming via sbrk can be slow on some systems, and can
+ sometimes be wasteful (in cases where programs immediately
+ afterward allocate more large chunks) the value should be high
+ enough so that your overall system performance would improve by
+ releasing this much memory.
+
+ The trim threshold and the mmap control parameters (see below)
+ can be traded off with one another. Trimming and mmapping are
+ two different ways of releasing unused memory back to the
+ system. Between these two, it is often possible to keep
+ system-level demands of a long-lived program down to a bare
+ minimum. For example, in one test suite of sessions measuring
+ the XF86 X server on Linux, using a trim threshold of 128K and a
+ mmap threshold of 192K led to near-minimal long term resource
+ consumption.
+
+ If you are using this malloc in a long-lived program, it should
+ pay to experiment with these values. As a rough guide, you
+ might set to a value close to the average size of a process
+ (program) running on your system. Releasing this much memory
+ would allow such a process to run in memory. Generally, it's
+ worth it to tune for trimming rather tham memory mapping when a
+ program undergoes phases where several large chunks are
+ allocated and released in ways that can reuse each other's
+ storage, perhaps mixed with phases where there are no such
+ chunks at all. And in well-behaved long-lived programs,
+ controlling release of large blocks via trimming versus mapping
+ is usually faster.
+
+ However, in most programs, these parameters serve mainly as
+ protection against the system-level effects of carrying around
+ massive amounts of unneeded memory. Since frequent calls to
+ sbrk, mmap, and munmap otherwise degrade performance, the default
+ parameters are set to relatively high values that serve only as
+ safeguards.
+
+ The trim value must be greater than page size to have any useful
+ effect. To disable trimming completely, you can set to
+ (unsigned long)(-1)
+
+ Trim settings interact with fastbin (MXFAST) settings: Unless
+ TRIM_FASTBINS is defined, automatic trimming never takes place upon
+ freeing a chunk with size less than or equal to MXFAST. Trimming is
+ instead delayed until subsequent freeing of larger chunks. However,
+ you can still force an attempted trim by calling malloc_trim.
+
+ Also, trimming is not generally possible in cases where
+ the main arena is obtained via mmap.
+
+ Note that the trick some people use of mallocing a huge space and
+ then freeing it at program startup, in an attempt to reserve system
+ memory, doesn't have the intended effect under automatic trimming,
+ since that memory will immediately be returned to the system.
+*/
+#define M_TRIM_THRESHOLD -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
+#endif
+
+/*
+ M_TOP_PAD is the amount of extra `padding' space to allocate or
+ retain whenever sbrk is called. It is used in two ways internally:
+
+ * When sbrk is called to extend the top of the arena to satisfy
+ a new malloc request, this much padding is added to the sbrk
+ request.
+
+ * When malloc_trim is called automatically from free(),
+ it is used as the `pad' argument.
+
+ In both cases, the actual amount of padding is rounded
+ so that the end of the arena is always a system page boundary.
+
+ The main reason for using padding is to avoid calling sbrk so
+ often. Having even a small pad greatly reduces the likelihood
+ that nearly every malloc request during program start-up (or
+ after trimming) will invoke sbrk, which needlessly wastes
+ time.
+
+ Automatic rounding-up to page-size units is normally sufficient
+ to avoid measurable overhead, so the default is 0. However, in
+ systems where sbrk is relatively slow, it can pay to increase
+ this value, at the expense of carrying around more memory than
+ the program needs.
+*/
+#define M_TOP_PAD -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD (0)
+#endif
+
+/*
+ M_MMAP_THRESHOLD is the request size threshold for using mmap()
+ to service a request. Requests of at least this size that cannot
+ be allocated using already-existing space will be serviced via mmap.
+ (If enough normal freed space already exists it is used instead.)
+
+ Using mmap segregates relatively large chunks of memory so that
+ they can be individually obtained and released from the host
+ system. A request serviced through mmap is never reused by any
+ other request (at least not directly; the system may just so
+ happen to remap successive requests to the same locations).
+
+ Segregating space in this way has the benefits that:
+
+ 1. Mmapped space can ALWAYS be individually released back
+ to the system, which helps keep the system level memory
+ demands of a long-lived program low.
+ 2. Mapped memory can never become `locked' between
+ other chunks, as can happen with normally allocated chunks, which
+ means that even trimming via malloc_trim would not release them.
+ 3. On some systems with "holes" in address spaces, mmap can obtain
+ memory that sbrk cannot.
+
+ However, it has the disadvantages that:
+
+ 1. The space cannot be reclaimed, consolidated, and then
+ used to service later requests, as happens with normal chunks.
+ 2. It can lead to more wastage because of mmap page alignment
+ requirements
+ 3. It causes malloc performance to be more dependent on host
+ system memory management support routines which may vary in
+ implementation quality and may impose arbitrary
+ limitations. Generally, servicing a request via normal
+ malloc steps is faster than going through a system's mmap.
+
+ The advantages of mmap nearly always outweigh disadvantages for
+ "large" chunks, but the value of "large" varies across systems. The
+ default is an empirically derived value that works well in most
+ systems.
+*/
+#define M_MMAP_THRESHOLD -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
+#endif
+
+/*
+ M_MMAP_MAX is the maximum number of requests to simultaneously
+ service using mmap. This parameter exists because
+. Some systems have a limited number of internal tables for
+ use by mmap, and using more than a few of them may degrade
+ performance.
+
+ The default is set to a value that serves only as a safeguard.
+ Setting to 0 disables use of mmap for servicing large requests. If
+ HAVE_MMAP is not set, the default value is 0, and attempts to set it
+ to non-zero values in mallopt will fail.
+*/
+#define M_MMAP_MAX -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX (65536)
+#endif
+
+
+/* ------------------ MMAP support ------------------ */
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#define MMAP(addr, size, prot, flags) \
+ (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
+
+
+/* ----------------------- Chunk representations ----------------------- */
+
+
+/*
+ This struct declaration is misleading (but accurate and necessary).
+ It declares a "view" into memory allowing access to necessary
+ fields at known offsets from a given base. See explanation below.
+*/
+
+struct malloc_chunk {
+
+ size_t prev_size; /* Size of previous chunk (if free). */
+ size_t size; /* Size in bytes, including overhead. */
+
+ struct malloc_chunk* fd; /* double links -- used only if free. */
+ struct malloc_chunk* bk;
+};
+
+
+typedef struct malloc_chunk* mchunkptr;
+
+/*
+ malloc_chunk details:
+
+ (The following includes lightly edited explanations by Colin Plumb.)
+
+ Chunks of memory are maintained using a `boundary tag' method as
+ described in e.g., Knuth or Standish. (See the paper by Paul
+ Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
+ survey of such techniques.) Sizes of free chunks are stored both
+ in the front of each chunk and at the end. This makes
+ consolidating fragmented chunks into bigger chunks very fast. The
+ size fields also hold bits representing whether chunks are free or
+ in use.
+
+ An allocated chunk looks like this:
+
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk, if allocated | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of chunk, in bytes |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | User data starts here... .
+ . .
+ . (malloc_usable_space() bytes) .
+ . |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ Where "chunk" is the front of the chunk for the purpose of most of
+ the malloc code, but "mem" is the pointer that is returned to the
+ user. "Nextchunk" is the beginning of the next contiguous chunk.
+
+ Chunks always begin on even word boundries, so the mem portion
+ (which is returned to the user) is also on an even word boundary, and
+ thus at least double-word aligned.
+
+ Free chunks are stored in circular doubly-linked lists, and look like this:
+
+ chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Size of previous chunk |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `head:' | Size of chunk, in bytes |P|
+ mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Forward pointer to next chunk in list |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Back pointer to previous chunk in list |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Unused space (may be 0 bytes long) .
+ . .
+ . |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ `foot:' | Size of chunk, in bytes |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The P (PREV_INUSE) bit, stored in the unused low-order bit of the
+ chunk size (which is always a multiple of two words), is an in-use
+ bit for the *previous* chunk. If that bit is *clear*, then the
+ word before the current chunk size contains the previous chunk
+ size, and can be used to find the front of the previous chunk.
+ The very first chunk allocated always has this bit set,
+ preventing access to non-existent (or non-owned) memory. If
+ prev_inuse is set for any given chunk, then you CANNOT determine
+ the size of the previous chunk, and might even get a memory
+ addressing fault when trying to do so.
+
+ Note that the `foot' of the current chunk is actually represented
+ as the prev_size of the NEXT chunk. This makes it easier to
+ deal with alignments etc but can be very confusing when trying
+ to extend or adapt this code.
+
+ The two exceptions to all this are
+
+ 1. The special chunk `top' doesn't bother using the
+ trailing size field since there is no next contiguous chunk
+ that would have to index off it. After initialization, `top'
+ is forced to always exist. If it would become less than
+ MINSIZE bytes long, it is replenished.
+
+ 2. Chunks allocated via mmap, which have the second-lowest-order
+ bit (IS_MMAPPED) set in their size fields. Because they are
+ allocated one-by-one, each must contain its own trailing size field.
+
+*/
+
+/*
+ ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+
+#define chunk2mem(p) ((void*)((char*)(p) + 2*(sizeof(size_t))))
+#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*(sizeof(size_t))))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk))
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE \
+ (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0)
+
+
+/* Check if a request is so large that it would wrap around zero when
+ padded and aligned. To simplify some other code, the bound is made
+ low enough so that adding MINSIZE will also not wrap around sero.
+*/
+
+#define REQUEST_OUT_OF_RANGE(req) \
+ ((unsigned long)(req) >= \
+ (unsigned long)(size_t)(-2 * MINSIZE))
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req) \
+ (((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK < MINSIZE) ? \
+ MINSIZE : \
+ ((req) + (sizeof(size_t)) + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
+
+/* Same, except also perform argument check */
+
+#define checked_request2size(req, sz) \
+ if (REQUEST_OUT_OF_RANGE(req)) { \
+ errno = ENOMEM; \
+ return 0; \
+ } \
+ (sz) = request2size(req);
+
+/*
+ --------------- Physical chunk operations ---------------
+*/
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p) ((p)->size & PREV_INUSE)
+
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+/* Bits to mask off when extracting size
+
+ Note: IS_MMAPPED is intentionally not masked off from size field in
+ macros for which mmapped chunks should never be seen. This should
+ cause helpful core dumps to occur if it is tried by accident by
+ people extending or adapting this malloc.
+*/
+#define SIZE_BITS (PREV_INUSE|IS_MMAPPED)
+
+/* Get size, ignoring use bits */
+#define chunksize(p) ((p)->size & ~(SIZE_BITS))
+
+
+/* Ptr to next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) ))
+
+/* Ptr to previous physical malloc_chunk */
+#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
+
+/* extract p's inuse bit */
+#define inuse(p)\
+((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE)
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE
+
+#define clear_inuse(p)\
+((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE)
+
+
+/* check/set/clear inuse bits in known places */
+#define inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
+
+#define set_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
+
+#define clear_inuse_bit_at_offset(p, s)\
+ (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s) ((p)->size = (s))
+
+/* Set size at footer (only when chunk is not in use) */
+#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
+
+
+/* -------------------- Internal data structures -------------------- */
+
+/*
+ Bins
+
+ An array of bin headers for free chunks. Each bin is doubly
+ linked. The bins are approximately proportionally (log) spaced.
+ There are a lot of these bins (128). This may look excessive, but
+ works very well in practice. Most bins hold sizes that are
+ unusual as malloc request sizes, but are more usual for fragments
+ and consolidated sets of chunks, which is what these bins hold, so
+ they can be found quickly. All procedures maintain the invariant
+ that no consolidated chunk physically borders another one, so each
+ chunk in a list is known to be preceeded and followed by either
+ inuse chunks or the ends of memory.
+
+ Chunks in bins are kept in size order, with ties going to the
+ approximately least recently used chunk. Ordering isn't needed
+ for the small bins, which all contain the same-sized chunks, but
+ facilitates best-fit allocation for larger chunks. These lists
+ are just sequential. Keeping them in order almost never requires
+ enough traversal to warrant using fancier ordered data
+ structures.
+
+ Chunks of the same size are linked with the most
+ recently freed at the front, and allocations are taken from the
+ back. This results in LRU (FIFO) allocation order, which tends
+ to give each chunk an equal opportunity to be consolidated with
+ adjacent freed chunks, resulting in larger free chunks and less
+ fragmentation.
+
+ To simplify use in double-linked lists, each bin header acts
+ as a malloc_chunk. This avoids special-casing for headers.
+ But to conserve space and improve locality, we allocate
+ only the fd/bk pointers of bins, and then use repositioning tricks
+ to treat these as the fields of a malloc_chunk*.
+*/
+
+typedef struct malloc_chunk* mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - ((sizeof(size_t))<<1)))
+
+/* analog of ++bin */
+#define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1)))
+
+/* Reminders about list directionality within bins */
+#define first(b) ((b)->fd)
+#define last(b) ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(P, BK, FD) { \
+ FD = P->fd; \
+ BK = P->bk; \
+ FD->bk = BK; \
+ BK->fd = FD; \
+}
+
+/*
+ Indexing
+
+ Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+ 8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+ 64 bins of size 8
+ 32 bins of size 64
+ 16 bins of size 512
+ 8 bins of size 4096
+ 4 bins of size 32768
+ 2 bins of size 262144
+ 1 bin of size what's left
+
+ The bins top out around 1MB because we expect to service large
+ requests via mmap.
+*/
+
+#define NBINS 96
+#define NSMALLBINS 32
+#define SMALLBIN_WIDTH 8
+#define MIN_LARGE_SIZE 256
+
+#define in_smallbin_range(sz) \
+ ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE)
+
+#define smallbin_index(sz) (((unsigned)(sz)) >> 3)
+
+#define bin_index(sz) \
+ ((in_smallbin_range(sz)) ? smallbin_index(sz) : __malloc_largebin_index(sz))
+
+/*
+ FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
+ first bin that is maintained in sorted order. This must
+ be the smallest size corresponding to a given bin.
+
+ Normally, this should be MIN_LARGE_SIZE. But you can weaken
+ best fit guarantees to sometimes speed up malloc by increasing value.
+ Doing this means that malloc may choose a chunk that is
+ non-best-fitting by up to the width of the bin.
+
+ Some useful cutoff values:
+ 512 - all bins sorted
+ 2560 - leaves bins <= 64 bytes wide unsorted
+ 12288 - leaves bins <= 512 bytes wide unsorted
+ 65536 - leaves bins <= 4096 bytes wide unsorted
+ 262144 - leaves bins <= 32768 bytes wide unsorted
+ -1 - no bins sorted (not recommended!)
+*/
+
+#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
+/* #define FIRST_SORTED_BIN_SIZE 65536 */
+
+/*
+ Unsorted chunks
+
+ All remainders from chunk splits, as well as all returned chunks,
+ are first placed in the "unsorted" bin. They are then placed
+ in regular bins after malloc gives them ONE chance to be used before
+ binning. So, basically, the unsorted_chunks list acts as a queue,
+ with chunks being placed on it in free (and __malloc_consolidate),
+ and taken off (to be either used or placed in bins) in malloc.
+*/
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M) (bin_at(M, 1))
+
+/*
+ Top
+
+ The top-most available chunk (i.e., the one bordering the end of
+ available memory) is treated specially. It is never included in
+ any bin, is used only if no other chunk is available, and is
+ released back to the system if it is very large (see
+ M_TRIM_THRESHOLD). Because top initially
+ points to its own bin with initial zero size, thus forcing
+ extension on the first malloc request, we avoid having any special
+ code in malloc to check whether it even exists yet. But we still
+ need to do so when getting memory from system, so we make
+ initial_top treat the bin as a legal but unusable chunk during the
+ interval between initialization and the first call to
+ __malloc_alloc. (This is somewhat delicate, since it relies on
+ the 2 preceding words to be zero during this interval as well.)
+*/
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M) (unsorted_chunks(M))
+
+/*
+ Binmap
+
+ To help compensate for the large number of bins, a one-level index
+ structure is used for bin-by-bin searching. `binmap' is a
+ bitvector recording whether bins are definitely empty so they can
+ be skipped over during during traversals. The bits are NOT always
+ cleared as soon as bins are empty, but instead only
+ when they are noticed to be empty during traversal in malloc.
+*/
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT 5
+#define BITSPERMAP (1U << BINMAPSHIFT)
+#define BINMAPSIZE (NBINS / BITSPERMAP)
+
+#define idx2block(i) ((i) >> BINMAPSHIFT)
+#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
+
+#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i))
+#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
+#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i))
+
+/*
+ Fastbins
+
+ An array of lists holding recently freed small chunks. Fastbins
+ are not doubly linked. It is faster to single-link them, and
+ since chunks are never removed from the middles of these lists,
+ double linking is not necessary. Also, unlike regular bins, they
+ are not even processed in FIFO order (they use faster LIFO) since
+ ordering doesn't much matter in the transient contexts in which
+ fastbins are normally used.
+
+ Chunks in fastbins keep their inuse bit set, so they cannot
+ be consolidated with other free chunks. __malloc_consolidate
+ releases all chunks in fastbins and consolidates them with
+ other free chunks.
+*/
+
+typedef struct malloc_chunk* mfastbinptr;
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2)
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE 80
+
+#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1)
+
+/*
+ FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+ that triggers automatic consolidation of possibly-surrounding
+ fastbin chunks. This is a heuristic, so the exact value should not
+ matter too much. It is defined at half the default trim threshold as a
+ compromise heuristic to only attempt consolidation if it is likely
+ to lead to trimming. However, it is not dynamically tunable, since
+ consolidation reduces fragmentation surrounding loarge chunks even
+ if trimming is not used.
+*/
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD \
+ ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
+
+/*
+ Since the lowest 2 bits in max_fast don't matter in size comparisons,
+ they are used as flags.
+*/
+
+/*
+ ANYCHUNKS_BIT held in max_fast indicates that there may be any
+ freed chunks at all. It is set true when entering a chunk into any
+ bin.
+*/
+
+#define ANYCHUNKS_BIT (1U)
+
+#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT))
+#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT)
+#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT)
+
+/*
+ FASTCHUNKS_BIT held in max_fast indicates that there are probably
+ some fastbin chunks. It is set true on entering a chunk into any
+ fastbin, and cleared only in __malloc_consolidate.
+*/
+
+#define FASTCHUNKS_BIT (2U)
+
+#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT))
+#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT))
+
+/* Set value of max_fast. Use impossibly small value if 0. */
+#define set_max_fast(M, s) \
+ (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
+ ((M)->max_fast & (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+
+#define get_max_fast(M) \
+ ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+
+
+/*
+ morecore_properties is a status word holding dynamically discovered
+ or controlled properties of the morecore function
+*/
+
+#define MORECORE_CONTIGUOUS_BIT (1U)
+
+#define contiguous(M) \
+ (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT))
+#define noncontiguous(M) \
+ (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0)
+#define set_contiguous(M) \
+ ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT)
+#define set_noncontiguous(M) \
+ ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
+
+
+/*
+ ----------- Internal state representation and initialization -----------
+*/
+
+struct malloc_state {
+
+ /* The maximum chunk size to be eligible for fastbin */
+ size_t max_fast; /* low 2 bits used as flags */
+
+ /* Fastbins */
+ mfastbinptr fastbins[NFASTBINS];
+
+ /* Base of the topmost chunk -- not otherwise kept in a bin */
+ mchunkptr top;
+
+ /* The remainder from the most recent split of a small request */
+ mchunkptr last_remainder;
+
+ /* Normal bins packed as described above */
+ mchunkptr bins[NBINS * 2];
+
+ /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
+ unsigned int binmap[BINMAPSIZE+1];
+
+ /* Tunable parameters */
+ unsigned long trim_threshold;
+ size_t top_pad;
+ size_t mmap_threshold;
+
+ /* Memory map support */
+ int n_mmaps;
+ int n_mmaps_max;
+ int max_n_mmaps;
+
+ /* Cache malloc_getpagesize */
+ unsigned int pagesize;
+
+ /* Track properties of MORECORE */
+ unsigned int morecore_properties;
+
+ /* Statistics */
+ size_t mmapped_mem;
+ size_t sbrked_mem;
+ size_t max_sbrked_mem;
+ size_t max_mmapped_mem;
+ size_t max_total_mem;
+};
+
+typedef struct malloc_state *mstate;
+
+/*
+ There is exactly one instance of this struct in this malloc.
+ If you are adapting this malloc in a way that does NOT use a static
+ malloc_state, you MUST explicitly zero-fill it before using. This
+ malloc relies on the property that malloc_state is initialized to
+ all zeroes (as is true of C statics).
+*/
+extern struct malloc_state __malloc_state; /* never directly referenced */
+
+/*
+ All uses of av_ are via get_malloc_state().
+ At most one "call" to get_malloc_state is made per invocation of
+ the public versions of malloc and free, but other routines
+ that in turn invoke malloc and/or free may call more then once.
+ Also, it is called in check* routines if __MALLOC_DEBUGGING is set.
+*/
+
+#define get_malloc_state() (&(__malloc_state))
+
+/* External internal utilities operating on mstates */
+void __malloc_consolidate(mstate);
+
+
+/* Debugging support */
+#if ! __MALLOC_DEBUGGING
+
+#define check_chunk(P)
+#define check_free_chunk(P)
+#define check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N)
+#define check_malloc_state()
+#define assert(x) ((void)0)
+
+
+#else
+
+#define check_chunk(P) __do_check_chunk(P)
+#define check_free_chunk(P) __do_check_free_chunk(P)
+#define check_inuse_chunk(P) __do_check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N) __do_check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N) __do_check_malloced_chunk(P,N)
+#define check_malloc_state() __do_check_malloc_state()
+
+extern void __do_check_chunk(mchunkptr p);
+extern void __do_check_free_chunk(mchunkptr p);
+extern void __do_check_inuse_chunk(mchunkptr p);
+extern void __do_check_remalloced_chunk(mchunkptr p, size_t s);
+extern void __do_check_malloced_chunk(mchunkptr p, size_t s);
+extern void __do_check_malloc_state(void);
+
+#include <assert.h>
+
+#endif