diff --git a/include/sys/sys_heap.h b/include/sys/sys_heap.h new file mode 100644 index 0000000000000..a767d2bcdaad9 --- /dev/null +++ b/include/sys/sys_heap.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2019 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef ZEPHYR_INCLUDE_SYS_SYS_HEAP_H_ +#define ZEPHYR_INCLUDE_SYS_SYS_HEAP_H_ + +#include + +/* Simple, fast heap implementation. + * + * A more or less conventional segregated fit allocator with + * power-of-two buckets. + * + * Excellent space efficiency. Chunks can be split arbitrarily in 8 + * byte units. Overhead is only four bytes per allocated chunk (eight + * bytes for heaps >256kb or on 64 bit systems), plus a log2-sized + * array of 2-word bucket headers. No coarse alignment restrictions + * on blocks, they can be split and merged (in units of 8 bytes) + * arbitrarily. + * + * Simple API. Initialize at runtime with any blob of memory and not + * a macro-generated, carefully aligned static array. Allocate and + * free by user pointer and not an opaque block handle. + * + * Good fragmentation resistance. Freed blocks are always immediately + * merged with adjacent free blocks. Allocations are attempted from a + * sample of the smallest bucket that might fit, falling back rapidly + * to the smallest block guaranteed to fit. Split memory remaining in + * the chunk is always returned immediately to the heap for other + * allocation. + * + * Excellent performance with firmly bounded runtime. All operations + * are constant time (though there is a search of the smallest bucket + * that has a compile-time-configurable upper bound, setting this to + * extreme values results in an effectively linear search of the + * list), objectively fast (~hundred instructions) and and amenable to + * locked operation. + */ + +/* Note: the init_mem/bytes fields are for the static initializer to + * have somewhere to put the arguments. The actual heap metadata at + * runtime lives in the heap memory itself and this struct simply + * functions as an opaque pointer. Would be good to clean this up and + * put the two values somewhere else, though it would make + * SYS_HEAP_DEFINE a little hairy to write. + */ +struct sys_heap { + struct z_heap *heap; + void *init_mem; + size_t init_bytes; +}; + +struct z_heap_stress_result { + u32_t total_allocs; + u32_t successful_allocs; + u32_t total_frees; + u64_t accumulated_in_use_bytes; +}; + +/** @brief Initialize sys_heap + * + * Initializes a sys_heap struct to manage the specified memory. + * + * @param h Heap to initialize + * @param mem Untyped pointer to unused memory + * @param bytes Size of region pointed to by @a mem + */ +void sys_heap_init(struct sys_heap *h, void *mem, size_t bytes); + +/** @brief Allocate memory from a sys_heap + * + * Returns a pointer to a block of unused memory in the heap. This + * memory will not otherwise be used until it is freed with + * sys_heap_free(). If no memory can be allocated, NULL will be + * returned. + * + * @note The sys_heap implementation is not internally synchronized. + * No two sys_heap functions should operate on the same heap at the + * same time. All locking must be provided by the user. + * + * @param h Heap from which to allocate + * @param bytes Number of bytes requested + * @return Pointer to memory the caller can now use + */ +void *sys_heap_alloc(struct sys_heap *h, size_t bytes); + +/** @brief Free memory into a sys_heap + * + * De-allocates a pointer to memory previously returned from + * sys_heap_alloc such that it can be used for other purposes. The + * caller must not use the memory region after entry to this function. + * + * @note The sys_heap implementation is not internally synchronized. + * No two sys_heap functions should operate on the same heap at the + * same time. All locking must be provided by the user. + * + * @param h Heap to which to return the memory + * @param mem A pointer previously returned from sys_heap_alloc() + */ +void sys_heap_free(struct sys_heap *h, void *mem); + +/** @brief Validate heap integrity + * + * Validates the internal integrity of a sys_heap. Intended for unit + * test and validation code, though potentially useful as a user API + * for applications with complicated runtime reliability requirements. + * Note: this cannot catch every possible error, but if it returns + * true then the heap is in a consistent state and can correctly + * handle any sys_heap_alloc() request and free any live pointer + * returned from a previou allocation. + * + * @param h Heap to validate + * @return true, if the heap is valid, otherwise false + */ +bool sys_heap_validate(struct sys_heap *h); + +/** @brief sys_heap stress test rig + * + * Test rig for heap allocation validation. This will loop for @a + * op_count cycles, in each iteration making a random choice to + * allocate or free a pointer of randomized (power law) size based on + * heuristics designed to keep the heap in a state where it is near @a + * target_percent full. Allocation and free operations are provided + * by the caller as callbacks (i.e. this can in theory test any heap). + * Results, including counts of frees and successfull/unsuccessful + * allocations, are returnewd via the @result struct. + * + * @param alloc Callback to perform an allocation. Passes back the @a + * arg parameter as a context handle. + * @param free Callback to perform a free of a pointer returned from + * @a alloc. Passes back the @a arg parameter as a + * context handle. + * @param arg Context handle to pass back to the callbacks + * @param total_bytes Size of the byte array the heap was initialized in + * @param op_count How many iterations to test + * @param scratch_mem A pointer to scratch memory to be used by the + * test. Should be about 1/2 the size of the heap + * for tests that need to stress fragmentation. + * @param scratch_bytes Size of the memory pointed to by @a scratch_mem + * @param target_percent Percentage fill value (1-100) to which the + * random allocation choices will seek. High + * values will result in significant allocation + * failures and a very fragmented heap. + * @param result Struct into which to store test results. + */ +void sys_heap_stress(void *(*alloc)(void *arg, size_t bytes), + void (*free)(void *arg, void *p), + void *arg, size_t total_bytes, + u32_t op_count, + void *scratch_mem, size_t scratch_bytes, + int target_percent, + struct z_heap_stress_result *result); + +#endif /* ZEPHYR_INCLUDE_SYS_SYS_HEAP_H_ */ diff --git a/lib/os/CMakeLists.txt b/lib/os/CMakeLists.txt index 26c349ab9f9b5..3020eac292bd0 100644 --- a/lib/os/CMakeLists.txt +++ b/lib/os/CMakeLists.txt @@ -14,6 +14,8 @@ zephyr_sources( thread_entry.c timeutil.c work_q.c + heap.c + heap-validate.c ) zephyr_sources_ifdef(CONFIG_JSON_LIBRARY json.c) diff --git a/lib/os/Kconfig b/lib/os/Kconfig index b89ddd1e2ee7a..cfe136b369244 100644 --- a/lib/os/Kconfig +++ b/lib/os/Kconfig @@ -24,4 +24,29 @@ config BASE64 help Enable base64 encoding and decoding functionality +config SYS_HEAP_VALIDATE + bool "Enable internal heap validity checking" + help + The sys_heap implementation is instrumented for extensive + internal validation. Leave this off by default, unless + modifying the heap code or (maybe) when running in + environments that require sensitive detection of memory + corruption. + +config SYS_HEAP_ALLOC_LOOPS + int "Number of tries in the inner heap allocation loop" + default 3 + help + The sys_heap allocator bounds the number of tries from the + smallest chunk level (the one that might not fit the + requested allocation) to maintain constant time performance. + Setting this to a high level will cause the heap to return + more successful allocations in situations of high + fragmentation, at the cost of potentially significant + (linear time) searching of the free list. The default is + three, which results in an allocator with good statistical + properties ("most" allocations that fit will succeed) but + keeps the maximum runtime at a tight bound so that the heap + is useful in locked or ISR contexts. + endmenu diff --git a/lib/os/heap-validate.c b/lib/os/heap-validate.c new file mode 100644 index 0000000000000..afdbab0231ccf --- /dev/null +++ b/lib/os/heap-validate.c @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2019 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include "heap.h" + +/* White-box sys_heap validation code. Uses internal data structures. + * Not expected to be useful in production apps. This checks every + * header field of every chunk and returns true if the totality of the + * data structure is a valid heap. It doesn't necessarily tell you + * that it is the CORRECT heap given the history of alloc/free calls + * that it can't inspect. In a pathological case, you can imagine + * something scribbling a copy of a previously-valid heap on top of a + * running one and corrupting it. YMMV. + */ + +static size_t max_chunkid(struct z_heap *h) +{ + return h->len - bytes_to_chunksz(h, 1); +} + +static bool in_bounds(struct z_heap *h, chunkid_t c) +{ + return (c >= h->chunk0) + && (c <= max_chunkid(h)) + && (size(h, c) < h->len); +} + +static bool valid_chunk(struct z_heap *h, chunkid_t c) +{ + return (size(h, c) > 0 + && (c + size(h, c) <= h->len) + && in_bounds(h, c) + && ((c == h->chunk0) || in_bounds(h, c - left_size(h, c))) + && (used(h, c) || in_bounds(h, free_prev(h, c))) + && (used(h, c) || in_bounds(h, free_next(h, c)))); +} + +/* Validate multiple state dimensions for the bucket "next" pointer + * and see that they match. Probably should unify the design a + * bit... + */ +static inline void check_nexts(struct z_heap *h, int bidx) +{ + struct z_heap_bucket *b = &h->buckets[bidx]; + + bool emptybit = (h->avail_buckets & (1 << bidx)) == 0; + bool emptylist = b->next == 0; + bool emptycount = b->list_size == 0; + bool empties_match = emptybit == emptylist && emptybit == emptycount; + + (void)empties_match; + CHECK(empties_match); + + if (b->next != 0) { + CHECK(valid_chunk(h, b->next)); + } + + if (b->list_size == 2) { + CHECK(free_next(h, b->next) == free_prev(h, b->next)); + CHECK(free_next(h, b->next) != b->next); + } else if (b->list_size == 1) { + CHECK(free_next(h, b->next) == free_prev(h, b->next)); + CHECK(free_next(h, b->next) == b->next); + } +} + +bool sys_heap_validate(struct sys_heap *heap) +{ + struct z_heap *h = heap->heap; + chunkid_t c; + + /* Check the free lists: entry count should match, empty bit + * should be correct, and all chunk entries should point into + * valid unused chunks. Mark those chunks USED, temporarily. + */ + for (int b = 0; b <= bucket_idx(h, h->len); b++) { + chunkid_t c0 = h->buckets[b].next; + u32_t n = 0; + + check_nexts(h, b); + + for(c = c0; c != 0 && (n == 0 || c != c0); n++, c = free_next(h, c)) { + if (!valid_chunk(h, c)) { + return false; + } + chunk_set_used(h, c, true); + } + + bool empty = (h->avail_buckets & (1 << b)) == 0; + bool zero = n == 0; + + if (empty != zero) { + return false; + } + + if (empty && h->buckets[b].next != 0) { + return false; + } + + if (n != h->buckets[b].list_size) { + return false; + } + } + + /* Walk through the chunks linearly, verifying sizes and end + * pointer and that the all chunks are now USED (i.e. all free + * blocks were found during enumeration). Mark all blocks + * UNUSED + */ + size_t prev_size = 0; + + for (c = h->chunk0; c <= max_chunkid(h); c = right_chunk(h, c)) { + if (!valid_chunk(h, c)) { + return false; + } + if (!used(h, c)) { + return false; + } + + if (c != h->chunk0) { + if (left_size(h, c) != prev_size) { + return false; + } + } + prev_size = size(h, c); + + chunk_set_used(h, c, false); + } + if (c != h->len) { + return false; /* Should have exactly consumed the buffer */ + } + + /* Go through the free lists again checking that the linear + * pass caught all the blocks and that they now show UNUSED. + * Mark them USED. + */ + for (int b = 0; b <= bucket_idx(h, h->len); b++) { + chunkid_t c0 = h->buckets[b].next; + int n = 0; + + if (c0 == 0) { + continue; + } + + for(c = c0; n == 0 || c != c0; n++, c = free_next(h, c)) { + if (used(h, c)) { + return false; + } + chunk_set_used(h, c, true); + } + } + + /* Now we are valid, but have managed to invert all the in-use + * fields. One more linear pass to fix them up + */ + for (c = h->chunk0; c <= max_chunkid(h); c = right_chunk(h, c)) { + chunk_set_used(h, c, !used(h, c)); + } + return true; +} + +struct z_heap_stress_rec { + void *(*alloc)(void *arg, size_t bytes); + void (*free)(void *arg, void *p); + void *arg; + size_t total_bytes; + struct z_heap_stress_block *blocks; + size_t nblocks; + size_t blocks_alloced; + size_t bytes_alloced; + u32_t target_percent; +}; + +struct z_heap_stress_block { + void *ptr; + size_t sz; +}; + +/* Very simple LCRNG (from https://nuclear.llnl.gov/CNP/rng/rngman/node4.html) + * + * Here to guarantee cross-platform test repeatability. + */ +static u32_t rand32(void) +{ + static u64_t state = 123456789; /* seed */ + + state = state * 2862933555777941757UL + 3037000493UL; + + return (u32_t)(state >> 32); +} + +static bool rand_alloc_choice(struct z_heap_stress_rec *sr) +{ + /* Edge cases: no blocks allocated, and no space for a new one */ + if (sr->blocks_alloced == 0) { + return true; + } else if (sr->blocks_alloced >= sr->nblocks) { + return false; + } + + /* The way this works is to scale the chance of choosing to + * allocate vs. free such that it's even odds when the heap is + * at the target percent, with linear tapering on the low + * slope (i.e. we choose to always allocate with an empty + * heap, allocate 50% of the time when the heap is exactly at + * the target, and always free when above the target). In + * practice, the operations aren't quite symmetric (you can + * always free, but your allocation might fail), and the units + * aren't matched (we're doing math based on bytes allocated + * and ignoring the overhead) but this is close enough. And + * yes, the math here is coarse (in units of percent), but + * that's good enough and fits well inside 32 bit quantities. + * (Note precision issue when heap size is above 40MB + * though!). + */ + __ASSERT(sr->total_bytes < 0xffffffffU / 100, "too big for u32!"); + u32_t full_pct = (100 * sr->bytes_alloced) / sr->total_bytes; + u32_t target = sr->target_percent ? sr->target_percent : 1; + u32_t free_chance = 0xffffffffU; + + if (full_pct < sr->target_percent) { + free_chance = full_pct * (0x80000000U / target); + } + + return rand32() > free_chance; +} + +/* Chooses a size of block to allocate, logarithmically favoring + * smaller blocks (i.e. blocks twice as large are half as frequent + */ +static size_t rand_alloc_size(struct z_heap_stress_rec *sr) +{ + ARG_UNUSED(sr); + + /* Min scale of 4 means that the half of the requests in the + * smallest size have an average size of 8 + */ + int scale = 4 + __builtin_clz(rand32()); + + return rand32() & ((1 << scale) - 1); +} + +/* Returns the index of a randomly chosen block to free */ +static size_t rand_free_choice(struct z_heap_stress_rec *sr) +{ + return rand32() % sr->blocks_alloced; +} + +/* General purpose heap stress test. Takes function pointers to allow + * for testing multiple heap APIs with the same rig. The alloc and + * free functions are passed back the argument as a context pointer. + * The "log" function is for readable user output. The total_bytes + * argument should reflect the size of the heap being tested. The + * scratch array is used to store temporary state and should be sized + * about half as large as the heap itself. Returns true on success. + */ +void sys_heap_stress(void *(*alloc)(void *arg, size_t bytes), + void (*free)(void *arg, void *p), + void *arg, size_t total_bytes, + u32_t op_count, + void *scratch_mem, size_t scratch_bytes, + int target_percent, + struct z_heap_stress_result *result) +{ + struct z_heap_stress_rec sr = { + .alloc = alloc, + .free = free, + .arg = arg, + .total_bytes = total_bytes, + .blocks = scratch_mem, + .nblocks = scratch_bytes / sizeof(struct z_heap_stress_block), + .target_percent = target_percent, + }; + + *result = (struct z_heap_stress_result) {0}; + + for (u32_t i = 0; i < op_count; i++) { + if (rand_alloc_choice(&sr)) { + size_t sz = rand_alloc_size(&sr); + void *p = sr.alloc(sr.arg, sz); + + result->total_allocs++; + if (p != NULL) { + result->successful_allocs++; + sr.blocks[sr.blocks_alloced].ptr = p; + sr.blocks[sr.blocks_alloced].sz = sz; + sr.blocks_alloced++; + sr.bytes_alloced += sz; + } + } else { + int b = rand_free_choice(&sr); + void *p = sr.blocks[b].ptr; + size_t sz = sr.blocks[b].sz; + + result->total_frees++; + sr.blocks[b] = sr.blocks[sr.blocks_alloced - 1]; + sr.blocks_alloced--; + sr.bytes_alloced -= sz; + sr.free(sr.arg, p); + } + result->accumulated_in_use_bytes += sr.bytes_alloced; + } +} diff --git a/lib/os/heap.c b/lib/os/heap.c new file mode 100644 index 0000000000000..86f26b39f8d14 --- /dev/null +++ b/lib/os/heap.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2019 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include "heap.h" + +static void *chunk_mem(struct z_heap *h, chunkid_t c) +{ + u8_t *ret = ((u8_t *)&h->buf[c]) + chunk_header_bytes(h); + + CHECK(!(((size_t)ret) & (big_heap(h) ? 7 : 3))); + + return ret; +} + +static void free_list_remove(struct z_heap *h, int bidx, + chunkid_t c) +{ + struct z_heap_bucket *b = &h->buckets[bidx]; + + CHECK(!used(h, c)); + CHECK(b->next != 0); + CHECK(b->list_size > 0); + CHECK((((h->avail_buckets & (1 << bidx)) == 0) + == (h->buckets[bidx].next == 0))); + + b->list_size--; + + if (b->list_size == 0) { + h->avail_buckets &= ~(1 << bidx); + b->next = 0; + } else { + chunkid_t first = free_prev(h, c), second = free_next(h, c); + + b->next = second; + chunk_set(h, first, FREE_NEXT, second); + chunk_set(h, second, FREE_PREV, first); + } +} + +static void free_list_add(struct z_heap *h, chunkid_t c) +{ + int b = bucket_idx(h, size(h, c)); + + if (h->buckets[b].list_size++ == 0) { + CHECK(h->buckets[b].next == 0); + CHECK((h->avail_buckets & (1 << b)) == 0); + + /* Empty list, first item */ + h->avail_buckets |= (1 << b); + h->buckets[b].next = c; + chunk_set(h, c, FREE_PREV, c); + chunk_set(h, c, FREE_NEXT, c); + } else { + /* Insert before (!) the "next" pointer */ + chunkid_t second = h->buckets[b].next; + chunkid_t first = free_prev(h, second); + + chunk_set(h, c, FREE_PREV, first); + chunk_set(h, c, FREE_NEXT, second); + chunk_set(h, first, FREE_NEXT, c); + chunk_set(h, second, FREE_PREV, c); + } + + CHECK(h->avail_buckets & (1 << bucket_idx(h, size(h, c)))); +} + +static ALWAYS_INLINE bool last_chunk(struct z_heap *h, chunkid_t c) +{ + return (c + size(h, c)) == h->len; +} + +/* Allocates (fit check has already been perfomred) from the next + * chunk at the specified bucket level + */ +static void *split_alloc(struct z_heap *h, int bidx, size_t sz) +{ + CHECK(h->buckets[bidx].next != 0 + && sz <= size(h, h->buckets[bidx].next)); + + chunkid_t c = h->buckets[bidx].next; + + free_list_remove(h, bidx, c); + + /* Split off remainder if it's usefully large */ + size_t rem = size(h, c) - sz; + + CHECK(rem < h->len); + + if (rem >= (big_heap(h) ? 2 : 1)) { + chunkid_t c2 = c + sz; + chunkid_t c3 = right_chunk(h, c); + + chunk_set(h, c, SIZE_AND_USED, sz); + chunk_set(h, c2, SIZE_AND_USED, rem); + chunk_set(h, c2, LEFT_SIZE, sz); + if (!last_chunk(h, c2)) { + chunk_set(h, c3, LEFT_SIZE, rem); + } + free_list_add(h, c2); + } + + chunk_set_used(h, c, true); + + return chunk_mem(h, c); +} + +void sys_heap_free(struct sys_heap *heap, void *mem) +{ + if (mem == NULL) { + return; /* ISO C free() semantics */ + } + + struct z_heap *h = heap->heap; + chunkid_t c = ((u8_t *)mem - chunk_header_bytes(h) + - (u8_t *)h->buf) / CHUNK_UNIT; + + /* Merge with right chunk? We can just absorb it. */ + if(!last_chunk(h, c) && !used(h, right_chunk(h, c))) { + chunkid_t rc = right_chunk(h, c); + size_t newsz = size(h, c) + size(h, rc); + + free_list_remove(h, bucket_idx(h, size(h, rc)), rc); + chunk_set(h, c, SIZE_AND_USED, newsz); + if (!last_chunk(h, c)) { + chunk_set(h, right_chunk(h, c), LEFT_SIZE, newsz); + } + } + + /* Merge with left chunk? It absorbs us. */ + if(c != h->chunk0 && !used(h, left_chunk(h, c))) { + chunkid_t lc = left_chunk(h, c); + chunkid_t rc = right_chunk(h, c); + size_t csz = size(h, c); + size_t merged_sz = csz + size(h, lc); + + free_list_remove(h, bucket_idx(h, size(h, lc)), lc); + chunk_set(h, lc, SIZE_AND_USED, merged_sz); + if (!last_chunk(h, lc)) { + chunk_set(h, rc, LEFT_SIZE, merged_sz); + } + + c = lc; + } + + chunk_set_used(h, c, false); + free_list_add(h, c); +} + +void *sys_heap_alloc(struct sys_heap *heap, size_t bytes) +{ + struct z_heap *h = heap->heap; + size_t sz = bytes_to_chunksz(h, bytes); + int bi = bucket_idx(h, sz); + struct z_heap_bucket *b = &h->buckets[bi]; + + if (bytes == 0 || bi > bucket_idx(h, h->len)) { + return NULL; + } + + /* First try a bounded count of items from the minimal bucket + * size. These may not fit, trying (e.g.) three means that + * (assuming that chunk sizes are evenly distributed[1]) we + * have a 7/8 chance of finding a match, thus keeping the + * number of such blocks consumed by allocation higher than + * the number of smaller blocks created by fragmenting larger + * ones. + * + * [1] In practice, they are never evenly distributed, of + * course. But even in pathological situations we still + * maintain our constant time performance and at worst see + * fragmentation waste of the order of the block allocated + * only. + */ + int loops = MIN(b->list_size, CONFIG_SYS_HEAP_ALLOC_LOOPS); + + for (int i = 0; i < loops; i++) { + CHECK(b->next != 0); + if (size(h, b->next) >= sz) { + return split_alloc(h, bi, sz); + } else { + b->next = free_next(h, b->next); + } + } + + /* Otherwise pick the smallest non-empty bucket guaranteed to + * fit and use that unconditionally. + */ + size_t bmask = h->avail_buckets & ~((1 << (bi + 1)) - 1); + + if ((bmask & h->avail_buckets) != 0) { + int minbucket = __builtin_ctz(bmask & h->avail_buckets); + + return split_alloc(h, minbucket, sz); + } + + return NULL; +} + +void sys_heap_init(struct sys_heap *heap, void *mem, size_t bytes) +{ + /* Must fit in a 32 bit count of u64's */ +#if __SIZEOF_SIZE_T__ > 4 + CHECK(bytes < 0x800000000ULL); +#endif + + /* Round the start up, the end down */ + size_t addr = ((size_t)mem + CHUNK_UNIT - 1) & ~(CHUNK_UNIT - 1); + size_t end = ((size_t)mem + bytes) & ~(CHUNK_UNIT - 1); + size_t buf_sz = (end - addr) / CHUNK_UNIT; + size_t hdr_chunks = chunksz(sizeof(struct z_heap)); + + CHECK(end > addr); + + struct z_heap *h = (struct z_heap *)addr; + + heap->heap = (struct z_heap *)addr; + h->buf = (u64_t *)addr; + h->buckets = (void *)(addr + CHUNK_UNIT * hdr_chunks); + h->len = buf_sz; + h->size_mask = (1 << (big_heap(h) ? 31 : 15)) - 1; + h->avail_buckets = 0; + + size_t buckets_bytes = ((bucket_idx(h, buf_sz) + 1) + * sizeof(struct z_heap_bucket)); + + h->chunk0 = hdr_chunks + chunksz(buckets_bytes); + + for (int i = 0; i <= bucket_idx(heap->heap, heap->heap->len); i++) { + heap->heap->buckets[i].list_size = 0; + heap->heap->buckets[i].next = 0; + } + + chunk_set(h, h->chunk0, SIZE_AND_USED, buf_sz - h->chunk0); + free_list_add(h, h->chunk0); +} diff --git a/lib/os/heap.h b/lib/os/heap.h new file mode 100644 index 0000000000000..9d07b0351600a --- /dev/null +++ b/lib/os/heap.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef ZEPHYR_INCLUDE_LIB_OS_HEAP_H_ +#define ZEPHYR_INCLUDE_LIB_OS_HEAP_H_ + +/* + * Internal heap APIs + */ + +/* Theese validation checks are non-trivially expensive, so enable + * only when debugging the heap code. They shouldn't be routine + * assertions. + */ +#ifdef CONFIG_SYS_HEAP_VALIDATE +#define CHECK(x) __ASSERT(x, "") +#else +#define CHECK(x) /**/ +#endif + +/* Chunks are identified by their offset in 8 byte units from the + * first address in the buffer (a zero-valued chunkid_t is used as a + * null; that chunk would always point into the metadata at the start + * of the heap and cannot be allocated). They are prefixed by a + * variable size header that depends on the size of the heap. Heaps + * with fewer than 2^15 units (256kb) of storage use shorts to store + * the fields, otherwise the units are 32 bit integers for a 16Gb heap + * space (larger spaces really aren't in scope for this code, but + * could be handled similarly I suppose). Because of that design + * there's a certain amount of boilerplate API needed to expose the + * field accessors since we can't use natural syntax. + * + * The fields are: + * SIZE_AND_USED: the total size (including header) of the chunk in + * 8-byte units. The top bit stores a "used" flag. + * LEFT_SIZE: The size of the left (next lower chunk in memory) + * neighbor chunk. + * FREE_PREV: Chunk ID of the previous node in a free list. + * FREE_NEXT: Chunk ID of the next node in a free list. + * + * The free lists are circular lists, one for each power-of-two size + * category. The free list pointers exist only for free chunks, + * obviously. This memory is part of the user's buffer when + * allocated. + */ +typedef size_t chunkid_t; + +#define CHUNK_UNIT 8 + +enum chunk_fields { SIZE_AND_USED, LEFT_SIZE, FREE_PREV, FREE_NEXT }; + +struct z_heap { + u64_t *buf; + struct z_heap_bucket *buckets; + u32_t len; + u32_t size_mask; + u32_t chunk0; + u32_t avail_buckets; +}; + +struct z_heap_bucket { + chunkid_t next; + size_t list_size; +}; + +static inline bool big_heap(struct z_heap *h) +{ + return sizeof(size_t) > 4 || h->len > 0x7fff; +} + +static inline size_t chunk_field(struct z_heap *h, chunkid_t c, + enum chunk_fields f) +{ + void *cmem = &h->buf[c]; + + if (big_heap(h)) { + return ((u32_t *)cmem)[f]; + } else { + return ((u16_t *)cmem)[f]; + } +} + +static inline void chunk_set(struct z_heap *h, chunkid_t c, + enum chunk_fields f, chunkid_t val) +{ + CHECK(c >= h->chunk0 && c < h->len); + CHECK((val & ~((h->size_mask << 1) + 1)) == 0); + CHECK((val & h->size_mask) < h->len); + + void *cmem = &h->buf[c]; + + if (big_heap(h)) { + ((u32_t *)cmem)[f] = (u32_t) val; + } else { + ((u16_t *)cmem)[f] = (u16_t) val; + } +} + +static inline chunkid_t used(struct z_heap *h, chunkid_t c) +{ + return (chunk_field(h, c, SIZE_AND_USED) & ~h->size_mask) != 0; +} + +static ALWAYS_INLINE chunkid_t size(struct z_heap *h, chunkid_t c) +{ + return chunk_field(h, c, SIZE_AND_USED) & h->size_mask; +} + +static inline void chunk_set_used(struct z_heap *h, chunkid_t c, + bool used) +{ + chunk_set(h, c, SIZE_AND_USED, + size(h, c) | (used ? (h->size_mask + 1) : 0)); +} + +static inline chunkid_t left_size(struct z_heap *h, chunkid_t c) +{ + return chunk_field(h, c, LEFT_SIZE); +} + +static inline chunkid_t free_prev(struct z_heap *h, chunkid_t c) +{ + return chunk_field(h, c, FREE_PREV); +} + +static inline chunkid_t free_next(struct z_heap *h, chunkid_t c) +{ + return chunk_field(h, c, FREE_NEXT); +} + +static inline chunkid_t left_chunk(struct z_heap *h, chunkid_t c) +{ + return c - left_size(h, c); +} + +static inline chunkid_t right_chunk(struct z_heap *h, chunkid_t c) +{ + return c + size(h, c); +} + +static inline size_t chunk_header_bytes(struct z_heap *h) +{ + return big_heap(h) ? 8 : 4; +} + +static inline size_t chunksz(size_t bytes) +{ + return (bytes + CHUNK_UNIT - 1) / CHUNK_UNIT; +} + +static inline size_t bytes_to_chunksz(struct z_heap *h, size_t bytes) +{ + return chunksz(chunk_header_bytes(h) + bytes); +} + +static int bucket_idx(struct z_heap *h, size_t sz) +{ + /* A chunk of size 2 is the minimum size on big heaps */ + return 31 - __builtin_clz(sz) - (big_heap(h) ? 1 : 0); +} + +#endif /* ZEPHYR_INCLUDE_LIB_OS_HEAP_H_ */ diff --git a/tests/lib/heap/CMakeLists.txt b/tests/lib/heap/CMakeLists.txt new file mode 100644 index 0000000000000..7fbd555f16de5 --- /dev/null +++ b/tests/lib/heap/CMakeLists.txt @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.13.1) +include($ENV{ZEPHYR_BASE}/cmake/app/boilerplate.cmake NO_POLICY_SCOPE) +project(heap) + +FILE(GLOB app_sources src/*.c) +target_sources(app PRIVATE ${app_sources}) diff --git a/tests/lib/heap/prj.conf b/tests/lib/heap/prj.conf new file mode 100644 index 0000000000000..a41fd3e078889 --- /dev/null +++ b/tests/lib/heap/prj.conf @@ -0,0 +1,2 @@ +CONFIG_ZTEST=y +CONFIG_SYS_HEAP_VALIDATE=y diff --git a/tests/lib/heap/src/main.c b/tests/lib/heap/src/main.c new file mode 100644 index 0000000000000..53e1218716f73 --- /dev/null +++ b/tests/lib/heap/src/main.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2019 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include + +/* Sort of a mess to detect. But basically MEMSZ becomes the minimum + * value specified by either of the two configuration mechanisms for + * system RAM size, or else INT_MAX if neither is specified (this is + * true right now only for native_posix, where a 256k array will have + * not trobule). + */ + +#ifdef DT_SRAM_SIZE +# define SZ1 DT_SRAM_SIZE +#else +# define SZ1 INT_MAX +#endif + +#ifdef CONFIG_SRAM_SIZE +# define SZ2 CONFIG_SRAM_SIZE +#else +# define SZ2 INT_MAX +#endif + +#define MEMSZ MIN(SZ1, SZ2) + +#define BIG_HEAP_SZ MIN(256 * 1024, MEMSZ / 2) +#define SMALL_HEAP_SZ 2048 + +char heapmem[BIG_HEAP_SZ]; + +/* How many alloc/free operations are tested on each heap. Two per + * byte of heap sounds about right to get exhaustive coverage without + * blowing too many cycles + */ +#define ITERATION_COUNT (2 * SMALL_HEAP_SZ) + +char scratchmem[sizeof(heapmem) / 2]; + +/* Simple dumb hash function of the size and address */ +static size_t fill_token(void *p, size_t sz) +{ + size_t pi = (size_t) p; + + return (pi * sz) ^ ((sz ^ 0xea6d) * ((pi << 11) | (pi >> 21))); +} + +/* Puts markers at the start and end of a block to ensure that nothing + * scribbled on it while it was allocated. The first word is the + * block size. The second and last (if they fits) are a hashed "fill + * token" + */ +static void fill_block(void *p, size_t sz) +{ + if (p == NULL) { + return; + } + + size_t tok = fill_token(p, sz); + + ((size_t *)p)[0] = sz; + + if (sz >= 2 * sizeof(size_t)) { + ((size_t *)p)[1] = tok; + } + + if (sz > 3*sizeof(size_t)) { + ((size_t *)p)[sz / sizeof(size_t) - 1] = tok; + } +} + +/* Checks markers just before freeing a block */ +static void check_fill(void *p) +{ + size_t sz = ((size_t *)p)[0]; + size_t tok = fill_token(p, sz); + + zassert_true(sz > 0, ""); + + if (sz >= 2 * sizeof(size_t)) { + zassert_true(((size_t *)p)[1] == tok, ""); + } + + if (sz > 3 * sizeof(size_t)) { + zassert_true(((size_t *)p)[sz / sizeof(size_t) - 1] == tok, ""); + } +} + +void *testalloc(void *arg, size_t bytes) +{ + void *ret = sys_heap_alloc(arg, bytes); + + fill_block(ret, bytes); + sys_heap_validate(arg); + return ret; +} + +void testfree(void *arg, void *p) +{ + check_fill(p); + sys_heap_free(arg, p); + sys_heap_validate(arg); +} + +static void log_result(u32_t sz, struct z_heap_stress_result *r) +{ + u32_t tot = r->total_allocs + r->total_frees; + u32_t avg = (u32_t)((r->accumulated_in_use_bytes + tot/2) / tot); + u32_t avg_pct = (u32_t)(100ULL * avg + sz / 2) / sz; + u32_t succ_pct = ((100ULL * r->successful_allocs + r->total_allocs / 2) + / r->total_allocs); + + TC_PRINT("successful allocs: %d/%d (%d%%), frees: %d," + " avg usage: %d/%d (%d%%)\n", + r->successful_allocs, r->total_allocs, succ_pct, + r->total_frees, avg, sz, avg_pct); +} + +/* Do a heavy test over a small heap, with many iterations that need + * to reuse memory repeatedly. Target 50% fill, as that setting tends + * to prevent runaway fragmentation and most allocations continue to + * succeed in steady state. + */ +static void test_small_heap(void) +{ + struct sys_heap heap; + struct z_heap_stress_result result; + + TC_PRINT("Testing small (%d byte) heap\n", SMALL_HEAP_SZ); + + sys_heap_init(&heap, heapmem, SMALL_HEAP_SZ); + zassert_true(sys_heap_validate(&heap), ""); + sys_heap_stress(testalloc, testfree, &heap, + SMALL_HEAP_SZ, ITERATION_COUNT, + scratchmem, sizeof(scratchmem), + 50, &result); + + log_result(SMALL_HEAP_SZ, &result); +} + +/* Very similar, but tests a fragmentation runaway scenario where we + * target 100% fill and end up breaking memory up into maximally + * fragmented blocks (i.e. small allocations always grab and split the + * bigger chunks). Obviously success rates in alloc will be very low, + * but consistency should still be maintained. Paradoxically, fill + * level is not much better than the 50% target due to all the + * fragmentation overhead (also the way we do accounting: we are + * counting bytes requested, so if you ask for a 3 byte block and + * receive a 8 byte minimal chunk, we still count that as 5 bytes of + * waste). + */ +static void test_fragmentation(void) +{ + struct sys_heap heap; + struct z_heap_stress_result result; + + TC_PRINT("Testing maximally fragmented (%d byte) heap\n", SMALL_HEAP_SZ); + + sys_heap_init(&heap, heapmem, SMALL_HEAP_SZ); + zassert_true(sys_heap_validate(&heap), ""); + sys_heap_stress(testalloc, testfree, &heap, + SMALL_HEAP_SZ, ITERATION_COUNT, + scratchmem, sizeof(scratchmem), + 100, &result); + + log_result(SMALL_HEAP_SZ, &result); +} + +/* The heap block format changes for heaps with more than 2^15 chunks, + * so test that case too. This can be too large to iterate over + * exhaustively with good performance, so the relative operation count + * and fragmentation is going to be lower. + */ +static void test_big_heap(void) +{ + struct sys_heap heap; + struct z_heap_stress_result result; + + TC_PRINT("Testing big (%d byte) heap\n", BIG_HEAP_SZ); + + sys_heap_init(&heap, heapmem, BIG_HEAP_SZ); + zassert_true(sys_heap_validate(&heap), ""); + sys_heap_stress(testalloc, testfree, &heap, + BIG_HEAP_SZ, ITERATION_COUNT, + scratchmem, sizeof(scratchmem), + 100, &result); + + log_result(BIG_HEAP_SZ, &result); +} + +void test_main(void) +{ + ztest_test_suite(lib_heap_test, + ztest_unit_test(test_small_heap), + ztest_unit_test(test_fragmentation), + ztest_unit_test(test_big_heap) + ); + + ztest_run_test_suite(lib_heap_test); +} diff --git a/tests/lib/heap/testcase.yaml b/tests/lib/heap/testcase.yaml new file mode 100644 index 0000000000000..9172d1ed76774 --- /dev/null +++ b/tests/lib/heap/testcase.yaml @@ -0,0 +1,3 @@ +tests: + lib.heap: + tags: heap