diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 00db4a8ae1220..7923fbb2c1c24 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -189,7 +189,9 @@ struct Slab {
 
   // Get the number of bytes needed to contain the bitfield bits.
   constexpr static uint32_t bitfield_bytes(uint32_t chunk_size) {
-    return ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8;
+    return __builtin_align_up(
+        ((num_chunks(chunk_size) + BITS_IN_WORD - 1) / BITS_IN_WORD) * 8,
+        MIN_ALIGNMENT + 1);
   }
 
   // The actual amount of memory available excluding the bitfield and metadata.
@@ -584,7 +586,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
 
   // If the requested alignment is less than what we already provide this is
   // just a normal allocation.
-  if (alignment < MIN_ALIGNMENT + 1)
+  if (alignment <= MIN_ALIGNMENT + 1)
     return gpu::allocate(size);
 
   // We can't handle alignments greater than 2MiB so we simply fail.
@@ -594,7 +596,7 @@ void *aligned_allocate(uint32_t alignment, uint64_t size) {
   // Trying to handle allocation internally would break the assumption that each
   // chunk is identical to eachother. Allocate enough memory with worst-case
   // alignment and then round up. The index logic will round down properly.
-  uint64_t rounded = size + alignment - 1;
+  uint64_t rounded = size + alignment - MIN_ALIGNMENT;
   void *ptr = gpu::allocate(rounded);
   return __builtin_align_up(ptr, alignment);
 }
diff --git a/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp
index b966e6953cc25..6e00eb86c680a 100644
--- a/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp
+++ b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp
@@ -10,7 +10,7 @@ TEST_MAIN(int, char **, char **) {
   // aligned_alloc with valid alignment and size
   void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16);
   EXPECT_NE(ptr, nullptr);
-  EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U);
+  EXPECT_TRUE(__builtin_is_aligned(ptr, 32));
 
   LIBC_NAMESPACE::free(ptr);
 
@@ -23,7 +23,7 @@ TEST_MAIN(int, char **, char **) {
   void *div =
       LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4);
   EXPECT_NE(div, nullptr);
-  EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U);
+  EXPECT_TRUE(__builtin_is_aligned(div, alignment));
 
   return 0;
 }
diff --git a/libc/test/integration/src/stdlib/gpu/malloc.cpp b/libc/test/integration/src/stdlib/gpu/malloc.cpp
index 7880206b1aaaa..a02a6749258ca 100644
--- a/libc/test/integration/src/stdlib/gpu/malloc.cpp
+++ b/libc/test/integration/src/stdlib/gpu/malloc.cpp
@@ -24,6 +24,7 @@ TEST_MAIN(int, char **, char **) {
   int *divergent = reinterpret_cast<int *>(
       LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
   EXPECT_NE(divergent, nullptr);
+  EXPECT_TRUE(__builtin_is_aligned(divergent, 16));
   *divergent = 1;
   EXPECT_EQ(*divergent, 1);
   LIBC_NAMESPACE::free(divergent);