diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 00db4a8ae1220..7923fbb2c1c24 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -189,7 +189,9 @@ struct Slab { // Get the number of bytes needed to contain the bitfield bits. constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) { - return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8; + return __builtin_align_up( + ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8, + MIN_ALIGNMENT + 1); } // The actual amount of memory available excluding the bitfield and metadata. @@ -584,7 +586,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) { // If the requested alignment is less than what we already provide this is // just a normal allocation. - if (alignment < MIN_ALIGNMENT + 1) + if (alignment <= MIN_ALIGNMENT + 1) return gpu::allocate(size); // We can't handle alignments greater than 2MiB so we simply fail. @@ -594,7 +596,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) { // Trying to handle allocation internally would break the assumption that each // chunk is identical to eachother. Allocate enough memory with worst-case // alignment and then round up. The index logic will round down properly. - uint64_t rounded = size + alignment - 1; + uint64_t rounded = size + alignment - MIN_ALIGNMENT; void *ptr = gpu::allocate(rounded); return __builtin_align_up(ptr, alignment); } diff --git a/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp index b966e6953cc25..6e00eb86c680a 100644 --- a/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp +++ b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp @@ -10,7 +10,7 @@ TEST_MAIN(int, char **, char **) { // aligned_alloc with valid alignment and size void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16); EXPECT_NE(ptr, nullptr); - EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U); + EXPECT_TRUE(__builtin_is_aligned(ptr, 32)); LIBC_NAMESPACE::free(ptr); @@ -23,7 +23,7 @@ TEST_MAIN(int, char **, char **) { void *div = LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4); EXPECT_NE(div, nullptr); - EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U); + EXPECT_TRUE(__builtin_is_aligned(div, alignment)); return 0; } diff --git a/libc/test/integration/src/stdlib/gpu/malloc.cpp b/libc/test/integration/src/stdlib/gpu/malloc.cpp index 7880206b1aaaa..a02a6749258ca 100644 --- a/libc/test/integration/src/stdlib/gpu/malloc.cpp +++ b/libc/test/integration/src/stdlib/gpu/malloc.cpp @@ -24,6 +24,7 @@ TEST_MAIN(int, char **, char **) { int *divergent = reinterpret_cast( LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16)); EXPECT_NE(divergent, nullptr); + EXPECT_TRUE(__builtin_is_aligned(divergent, 16)); *divergent = 1; EXPECT_EQ(*divergent, 1); LIBC_NAMESPACE::free(divergent);