From 744ad386ae9e980e03f6e4f5286ca5db25e52d5c Mon Sep 17 00:00:00 2001 From: Lukas Lorimer Date: Sun, 28 Apr 2019 23:47:19 -0400 Subject: [PATCH] Start writing a new malloc Passes all tests, but does not free anything yet --- .gitignore | 1 + include/unistd.h | 1 + scripts/malloc-config.py | 52 ++++++++ src/malloc.c | 251 ++++++++++++++++++++++++++++++++++++--- src/unistd.c | 9 ++ tests/malloc-test.c | 7 +- 6 files changed, 303 insertions(+), 18 deletions(-) create mode 100755 scripts/malloc-config.py diff --git a/.gitignore b/.gitignore index 34f2708..1d179c1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ tests/bin tests/test-data/scratch tests/test-data/unreadable scripts/size-of.c +src/gen */*.o */*.a diff --git a/include/unistd.h b/include/unistd.h index 69e4257..27ee72b 100644 --- a/include/unistd.h +++ b/include/unistd.h @@ -9,6 +9,7 @@ typedef int uid_t; // TODO: Type declarations. +int brk(void *addr); void *sbrk(intptr_t increment); ssize_t read(int fd, void *buf, size_t count); ssize_t write(int fd, const void *buf, size_t count); diff --git a/scripts/malloc-config.py b/scripts/malloc-config.py new file mode 100755 index 0000000..02bdbda --- /dev/null +++ b/scripts/malloc-config.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import os +import sys + +PAGE_SIZE = 4096 + +# TODO: Take from stdint. +PTR_SIZE = 8 + +def items_in_arena(item_size): + # We want to find out how many items can fit in this page, minus the header. + items_and_bitset_size = PAGE_SIZE - 2 * PTR_SIZE - 8 + + # There's probably a closed form formula for this, but it would get weird with + # the `ceil(i mod 8)`. + # TODO: Is it worth moving this to a binary search? + i = 0 + + while True: + if item_size * i + ((i + 7) // 8) > items_and_bitset_size: + break + i += 1 + return i - 1 + + +def main(): + page_size_log = PAGE_SIZE.bit_length() - 1 + if 1 << page_size_log != PAGE_SIZE: + print('Invalid page size, must be a power of 2') + sys.exit(1) + + nums = [items_in_arena(1 << k) for k in range(page_size_log)] + #efficiency = [(1 << s, ((1 << s) * n) / 4096) for s, n in enumerate(nums)] + + gen_folder = '../src/gen' + if not os.path.exists(gen_folder): + os.mkdir(gen_folder) + + with open(gen_folder + '/malloc-config.h', 'w+') as f: + f.write('// DO NOT EDIT!\n') + f.write('// THIS WAS GENERATED BY `malloc-config.py`\n\n') + + f.write('#pragma once\n\n') + + f.write('#define PAGE_SIZE {}U\n'.format(PAGE_SIZE)) + f.write('#define PAGE_SIZE_LOG_2 {}U\n\n'.format(page_size_log)) + + f.write('#define NUM_ITEMS {{{}}}\n'.format(', '.join(map(str, nums)))) + +if __name__ == '__main__': + main() diff --git a/src/malloc.c b/src/malloc.c index ac54d9b..c74a8a6 100644 --- a/src/malloc.c +++ b/src/malloc.c @@ -1,12 +1,68 @@ +#include +#include +#include #include #include +#include #include #include #include #include +#include "gen/malloc-config.h" + // A simple allocator which does not free anything. +// This allocator has a few goals: + +// 1) Avoid having a header for every small allocation. +// 2) Large allocations should be aligned to pages. + +// SMALL SIZES + +// For small sizes, items are allocated out of arenas. There is a list of +// arenas for each power of 2 between 1 and 2048(?) (inclusive). Allocation +// sizes are rounded up to the next highest power of 2. + +// Each arena is for a double allocation size. Each arena is PAGE_SIZE +// bytes long. +// The arena contains a header, followed by as many items of the given size +// that can fit in the arena. + +// header | used-bitset | (item 1) | (item 2) | ... + +typedef struct small_arena_header { + struct small_arena_header *next_arena; + struct small_arena_header *prev_arena; + // The number of slots that are free in the structure. + uint16_t free_slots; + // The word that a search for an empty slot should be started at. + // If the first free slot is the 10th, then it doesn't make sense to look at + // slots 1-9 next time. + uint16_t start_search; + // The size of each items (stored as log 2). + uint8_t item_size; + // Padding, added by the compiler. + //uint8_t padding; + //uint64_t padding; +} small_arena_header; + +// If this line is updated, also update the malloc-config generator script. +static_assert(sizeof(small_arena_header) == sizeof(void*) * 2 + 8, "Invalid small_arena_header size"); + +static const uint16_t num_items[] = NUM_ITEMS; + +// The next arena pointer allows arenas to be held in a doubly linked list. +// Used bitset is a bitset, with a bit corresponding to each item in the arena. + +// There is 2 global doubly circular linked lists for each size of small arenas. +// The first holds arenas that are full, the other contains arenas that are not +// full. + +// TODO: Initialize these. +static small_arena_header full_small_arenas[PAGE_SIZE_LOG_2]; +static small_arena_header sparse_small_arenas[PAGE_SIZE_LOG_2]; + // This flag could also go before main() if that makes things easier. static volatile atomic_flag malloc_mtx_flag = ATOMIC_FLAG_INIT; static volatile bool malloc_mtx_init = false; @@ -26,11 +82,31 @@ static void init_malloc(void) { // Busy wait for flag... } - // We don't care if another thread already set the flag. + if (malloc_mtx_init) { + atomic_flag_clear(&malloc_mtx_flag); + return; + } + mtx_init(&malloc_mtx, mtx_plain); + + // Initialize small arena linked lists. + for (size_t i = 0; i < PAGE_SIZE_LOG_2; ++i) { + // TODO: Move to `sys/queue.h`? + full_small_arenas[i].next_arena = &full_small_arenas[i]; + full_small_arenas[i].prev_arena = &full_small_arenas[i]; + sparse_small_arenas[i].next_arena = &sparse_small_arenas[i]; + sparse_small_arenas[i].prev_arena = &sparse_small_arenas[i]; + + full_small_arenas[i].free_slots = 1; + full_small_arenas[i].start_search = 0; + full_small_arenas[i].item_size = i; + sparse_small_arenas[i].free_slots = 1; + sparse_small_arenas[i].start_search = 0; + sparse_small_arenas[i].item_size = i; + } + // TODO: Do we need a barrier here? malloc_mtx_init = true; - atomic_flag_clear(&malloc_mtx_flag); } @@ -44,35 +120,171 @@ static void *get_data(malloc_metadata *meta) { return chars + sizeof(malloc_metadata); } -void *malloc(size_t size) { +static unsigned char *get_bitset(small_arena_header *header) { + return ((unsigned char *)header) + sizeof(small_arena_header); +} + +static unsigned char *get_items(small_arena_header *header) { + // The first item needs to be aligned. The easiest way to do this is to go + // from the top, then subtract all the items. + unsigned char * const last = ((unsigned char *)header) + PAGE_SIZE; + return last - ((1U << header->item_size) * num_items[header->item_size]); +} + +static void *sbrk_malloc(size_t size) { if (size == 0) { return NULL; } + const uintptr_t cur_break = (uintptr_t)sbrk(0); + const size_t front_padding = (PAGE_SIZE - (cur_break % PAGE_SIZE)) % PAGE_SIZE; + + if (brk((void*)(cur_break + front_padding + size)) == 0) { + return (void*)(cur_break + front_padding); + } + return NULL; +} + +static void sbrk_free(void *ptr) { + if (ptr == NULL) { + return; + } + // Nothing (for our simple allocation scheme). +} + +// Doesn't work when n == 0. +static size_t log2_ceil(size_t n) { + assert(n != 0); + + // Do a strict ceil. + --n; + // Count all digits (if any). + size_t l = 0; + while (n != 0) { + n >>= 1U; + l++; + } + return l; +} + +void *malloc(size_t size) { + if (size > SIZE_MAX / 2) { + errno = ENOMEM; + return NULL; + } + init_malloc(); mtx_lock(&malloc_mtx); - void *ret = sbrk(size + sizeof(malloc_metadata)); - mtx_unlock(&malloc_mtx); + if (size >= PAGE_SIZE) { + void *ptr = sbrk_malloc(size); + mtx_unlock(&malloc_mtx); + return ptr; + } - if (ret == (void *)-1) { - return NULL; + if (size == 0) { + // C++ requires an object of size 0 to be allocated. + size = 1; } - malloc_metadata *meta = (malloc_metadata *)ret; - meta->size = size; + const size_t log = log2_ceil(size); - return get_data(meta); + small_arena_header * const head = &sparse_small_arenas[log]; + + const size_t items = num_items[log]; + + if (head->next_arena == head) { + // Allocate new arena. + small_arena_header * const new_arena = sbrk_malloc(PAGE_SIZE); + if (new_arena == NULL) { + mtx_unlock(&malloc_mtx); + return NULL; + } + assert((uintptr_t)new_arena % PAGE_SIZE == 0); + new_arena->free_slots = items; + new_arena->start_search = 0; + new_arena->item_size = log; + { + // Clear bit set. + unsigned char *bitset = get_bitset(new_arena); + memset(bitset, 0, (items + 7) / 8); + } + + // Hook up new arena into sparse list. + new_arena->next_arena = head; + new_arena->prev_arena = head; + head->next_arena = new_arena; + head->prev_arena = new_arena; + } + + small_arena_header * const from = head->next_arena; + assert(from->free_slots > 0); + + // The first arena has a slot available. Find it. + // TODO: Search in bigger chunks. + unsigned char *bitset = get_bitset(from); + size_t cur = from->start_search; + size_t offset; + + for (;;) { + if (cur == items / 8) { + // In the last chunk, so some bits may not correspond to a slot. + const size_t num = items % 8; + offset = 0; + // Check each bit, up to the end. + while (offset < num && (bitset[cur] & (1 << offset))) { + offset++; + } + if (offset < num) { + break; + } + cur = 0; + } else { + if (bitset[cur] != UCHAR_MAX) { + // There's a spot. Find it. + offset = 0; + while (bitset[cur] & (1 << offset)) { + offset++; + } + assert(offset <= 7); + break; + } + cur++; + } + } + + // Use the slot. + bitset[cur] |= 1U << offset; + from->free_slots--; + + if (from->free_slots == 0) { + from->next_arena->prev_arena = from->prev_arena; + from->prev_arena->next_arena = from->next_arena; + + // Move to end of full list. + small_arena_header * const newl = &full_small_arenas[log]; + from->next_arena = newl; + from->prev_arena = newl->prev_arena; + + newl->prev_arena = from; + from->prev_arena->next_arena = from; + } + + mtx_unlock(&malloc_mtx); + + return &get_items(from)[(cur * 8 + offset) * (1U << log)]; } void free(void *ptr) { - if (ptr == NULL) { - return; - } - // Nothing (for our simple allocation scheme). + // TODO + //sbrk_free(ptr); + //if (size >= PAGE_SIZE) { + // sbrk_free(ptr); + //} } void *realloc(void *ptr, size_t new_size) { + // TODO: Check if this is right for new_size == 0, or ptr == NULL. void *new_ptr = malloc(new_size); if (new_ptr == NULL) { return NULL; @@ -82,8 +294,15 @@ void *realloc(void *ptr, size_t new_size) { if (ptr == NULL) { return new_ptr; } - const malloc_metadata *old_meta = get_meta(ptr); - const size_t old_size = old_meta->size; + const uintptr_t ptr_int = (uintptr_t)ptr; + size_t old_size; + if (ptr_int % PAGE_SIZE == 0) { + // TODO: Support >= PAGE_SIZE + return NULL; + } else { + small_arena_header * const chunk = (small_arena_header *)(ptr_int / PAGE_SIZE * PAGE_SIZE); + old_size = (1U << chunk->item_size); + } const size_t min_size = (old_size < new_size ? old_size : new_size); memcpy(new_ptr, ptr, min_size); diff --git a/src/unistd.c b/src/unistd.c index 681a7f0..fcbe907 100644 --- a/src/unistd.c +++ b/src/unistd.c @@ -6,6 +6,15 @@ #include "syscall.h" +int brk(void *addr) { + const uint64_t new_break = __syscall1((uint64_t)addr, SYS_BRK); + if ((uintptr_t)addr < (uintptr_t)new_break) { + errno = ENOMEM; + return -1; + } + return 0; +} + void *sbrk(intptr_t increment) { const uint64_t cur_break = __syscall1(0, SYS_BRK); if (increment == 0) { diff --git a/tests/malloc-test.c b/tests/malloc-test.c index 33e8775..9b963d3 100644 --- a/tests/malloc-test.c +++ b/tests/malloc-test.c @@ -3,10 +3,13 @@ #include int main(void) { - assert(malloc(0) == NULL); - assert(realloc(NULL, 0) == NULL); free(NULL); + void *p = malloc(0); + free(p); + p = realloc(NULL, 0); + free(p); + const char *str = "abcdefg"; void *a = malloc(4); memcpy(a, str, 4);