diff --git a/Cargo.lock b/Cargo.lock index 361c31d38b..efb26eedee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-activity" version = "0.5.2" @@ -1808,35 +1814,18 @@ dependencies = [ "gl_generator", ] -[[package]] -name = "gpu-alloc" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" -dependencies = [ - "bitflags 2.10.0", - "gpu-alloc-types", -] - -[[package]] -name = "gpu-alloc-types" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" -dependencies = [ - "bitflags 2.10.0", -] - [[package]] name = "gpu-allocator" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c151a2a5ef800297b4e79efa4f4bec035c5f51d5ae587287c9b952bdf734cacd" +checksum = "51255ea7cfaadb6c5f1528d43e92a82acb2b96c43365989a28b2d44ee38f8795" dependencies = [ + "ash", + "hashbrown 0.16.0", "log", "presser", - "thiserror 1.0.69", - "windows 0.58.0", + "thiserror 2.0.17", + "windows 0.61.3", ] [[package]] @@ -1898,6 +1887,8 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.2.0", "serde", ] @@ -4944,7 +4935,6 @@ dependencies = [ "glutin", "glutin-winit", "glutin_wgl_sys 0.6.1", - "gpu-alloc", "gpu-allocator", "gpu-descriptor", "hashbrown 0.16.0", diff --git a/Cargo.toml b/Cargo.toml index 12f94bfed0..b1d4276b90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -207,15 +207,18 @@ objc = "0.2.5" # Vulkan dependencies android_system_properties = "0.1.1" ash = "0.38" -gpu-alloc = "0.6" gpu-descriptor = "0.3.2" # DX12 dependencies -gpu-allocator = { version = "0.27", default-features = false } range-alloc = "0.1" mach-dxcompiler-rs = { version = "0.1.4", default-features = false } # remember to increase max_shader_model if applicable windows-core = { version = "0.58", default-features = false } +# DX12 and Vulkan dependencies +gpu-allocator = { version = "0.28.0", default-features = false, features = [ + "hashbrown", +] } + # Gles dependencies khronos-egl = "6" glow = "0.16" diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index bb300f6107..88f4b0625e 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -91,7 +91,6 @@ vulkan = [ "dep:arrayvec", "dep:ash", "dep:bytemuck", - "dep:gpu-alloc", "dep:gpu-descriptor", "dep:hashbrown", "dep:libc", @@ -102,6 +101,7 @@ vulkan = [ "dep:profiling", "dep:smallvec", "dep:windows", + "gpu-allocator/vulkan", "windows/Win32", ] gles = [ @@ -142,7 +142,7 @@ dx12 = [ "dep:profiling", "dep:range-alloc", "dep:windows-core", - "dep:gpu-allocator", + "gpu-allocator/d3d12", "windows/Win32_Graphics_Direct3D_Fxc", "windows/Win32_Graphics_Direct3D_Dxc", "windows/Win32_Graphics_Direct3D", @@ -221,9 +221,10 @@ glow = { workspace = true, optional = true } ######################## [target.'cfg(not(target_arch = "wasm32"))'.dependencies] +# Backend: Vulkan and Dx12 +gpu-allocator = { workspace = true, optional = true } # Backend: Vulkan ash = { workspace = true, optional = true } -gpu-alloc = { workspace = true, optional = true } gpu-descriptor = { workspace = true, optional = true } smallvec = { workspace = true, optional = true, features = ["union"] } # Backend: GLES @@ -250,7 +251,6 @@ windows-core = { workspace = true, optional = true } # Backend: Dx12 bit-set = { workspace = true, optional = true } range-alloc = { workspace = true, optional = true } -gpu-allocator = { workspace = true, optional = true, features = ["d3d12"] } once_cell = { workspace = true, optional = true } # backend: GLES glutin_wgl_sys = { workspace = true, optional = true } diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs index 8cee410230..4a3afedf3f 100644 --- a/wgpu-hal/src/dx12/suballocation.rs +++ b/wgpu-hal/src/dx12/suballocation.rs @@ -143,7 +143,7 @@ impl Allocator { allocations, blocks, total_allocated_bytes: upstream.total_allocated_bytes, - total_reserved_bytes: upstream.total_reserved_bytes, + total_reserved_bytes: upstream.total_capacity_bytes, } } } @@ -621,37 +621,3 @@ impl<'a> DeviceAllocationContext<'a> { Ok(allocation_info) } } - -impl From for crate::DeviceError { - fn from(result: gpu_allocator::AllocationError) -> Self { - match result { - gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory, - gpu_allocator::AllocationError::FailedToMap(e) => { - log::error!("DX12 gpu-allocator: Failed to map: {e}"); - Self::Lost - } - gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => { - log::error!("DX12 gpu-allocator: No Compatible Memory Type Found"); - Self::Lost - } - gpu_allocator::AllocationError::InvalidAllocationCreateDesc => { - log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description"); - Self::Lost - } - gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => { - log::error!("DX12 gpu-allocator: Invalid Allocator Creation Description: {e}"); - Self::Lost - } - - gpu_allocator::AllocationError::Internal(e) => { - log::error!("DX12 gpu-allocator: Internal Error: {e}"); - Self::Lost - } - gpu_allocator::AllocationError::BarrierLayoutNeedsDevice10 - | gpu_allocator::AllocationError::CastableFormatsRequiresEnhancedBarriers - | gpu_allocator::AllocationError::CastableFormatsRequiresAtLeastDevice12 => { - unreachable!() - } - } - } -} diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 698858fd31..d62ea34548 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -381,6 +381,41 @@ pub enum DeviceError { Unexpected, } +#[cfg(any(dx12, vulkan))] +impl From for DeviceError { + fn from(result: gpu_allocator::AllocationError) -> Self { + match result { + gpu_allocator::AllocationError::OutOfMemory => Self::OutOfMemory, + gpu_allocator::AllocationError::FailedToMap(e) => { + log::error!("DX12 gpu-allocator: Failed to map: {e}"); + Self::Lost + } + gpu_allocator::AllocationError::NoCompatibleMemoryTypeFound => { + log::error!("DX12 gpu-allocator: No Compatible Memory Type Found"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocationCreateDesc => { + log::error!("DX12 gpu-allocator: Invalid Allocation Creation Description"); + Self::Lost + } + gpu_allocator::AllocationError::InvalidAllocatorCreateDesc(e) => { + log::error!("DX12 gpu-allocator: Invalid Allocator Creation Description: {e}"); + Self::Lost + } + + gpu_allocator::AllocationError::Internal(e) => { + log::error!("DX12 gpu-allocator: Internal Error: {e}"); + Self::Lost + } + gpu_allocator::AllocationError::BarrierLayoutNeedsDevice10 + | gpu_allocator::AllocationError::CastableFormatsRequiresEnhancedBarriers + | gpu_allocator::AllocationError::CastableFormatsRequiresAtLeastDevice12 => { + unreachable!() + } + } + } +} + #[allow(dead_code)] // may be unused on some platforms #[cold] fn hal_usage_error(txt: T) -> ! { diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index ba50eed76f..49828430fa 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -2255,87 +2255,42 @@ impl super::Adapter { signal_semaphores: Default::default(), }; - let mem_allocator = { - let limits = self.phd_capabilities.properties.limits; - - // Note: the parameters here are not set in stone nor where they picked with - // strong confidence. - // `final_free_list_chunk` should be bigger than starting_free_list_chunk if - // we want the behavior of starting with smaller block sizes and using larger - // ones only after we observe that the small ones aren't enough, which I think - // is a good "I don't know what the workload is going to be like" approach. - // - // For reference, `VMA`, and `gpu_allocator` both start with 256 MB blocks - // (then VMA doubles the block size each time it needs a new block). - // At some point it would be good to experiment with real workloads - // - // TODO(#5925): The plan is to switch the Vulkan backend from `gpu_alloc` to - // `gpu_allocator` which has a different (simpler) set of configuration options. - // - // TODO: These parameters should take hardware capabilities into account. - let mb = 1024 * 1024; - let perf_cfg = gpu_alloc::Config { - starting_free_list_chunk: 128 * mb, - final_free_list_chunk: 512 * mb, - minimal_buddy_size: 1, - initial_buddy_dedicated_size: 8 * mb, - dedicated_threshold: 32 * mb, - preferred_dedicated_threshold: mb, - transient_dedicated_threshold: 128 * mb, - }; - let mem_usage_cfg = gpu_alloc::Config { - starting_free_list_chunk: 8 * mb, - final_free_list_chunk: 64 * mb, - minimal_buddy_size: 1, - initial_buddy_dedicated_size: 8 * mb, - dedicated_threshold: 8 * mb, - preferred_dedicated_threshold: mb, - transient_dedicated_threshold: 16 * mb, - }; - let config = match memory_hints { - wgt::MemoryHints::Performance => perf_cfg, - wgt::MemoryHints::MemoryUsage => mem_usage_cfg, - wgt::MemoryHints::Manual { - suballocated_device_memory_block_size, - } => gpu_alloc::Config { - starting_free_list_chunk: suballocated_device_memory_block_size.start, - final_free_list_chunk: suballocated_device_memory_block_size.end, - initial_buddy_dedicated_size: suballocated_device_memory_block_size.start, - ..perf_cfg - }, - }; - - let max_memory_allocation_size = - if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 { - maintenance_3.max_memory_allocation_size - } else { - u64::MAX - }; - let properties = gpu_alloc::DeviceProperties { - max_memory_allocation_count: limits.max_memory_allocation_count, - max_memory_allocation_size, - non_coherent_atom_size: limits.non_coherent_atom_size, - memory_types: memory_types - .iter() - .map(|memory_type| gpu_alloc::MemoryType { - props: gpu_alloc::MemoryPropertyFlags::from_bits_truncate( - memory_type.property_flags.as_raw() as u8, - ), - heap: memory_type.heap_index, - }) - .collect(), - memory_heaps: mem_properties - .memory_heaps_as_slice() - .iter() - .map(|&memory_heap| gpu_alloc::MemoryHeap { - size: memory_heap.size, - }) - .collect(), - buffer_device_address: enabled_extensions - .contains(&khr::buffer_device_address::NAME), - }; - gpu_alloc::GpuAllocator::new(config, properties) + // TODO: the allocator's configuration should take hardware capability into + // account. + const MB: u64 = 1024 * 1024; + let allocation_sizes = match memory_hints { + wgt::MemoryHints::Performance => gpu_allocator::AllocationSizes::new(128 * MB, 64 * MB) + .with_max_device_memblock_size(256 * MB) + .with_max_host_memblock_size(128 * MB), + wgt::MemoryHints::MemoryUsage => gpu_allocator::AllocationSizes::new(8 * MB, 4 * MB) + .with_max_device_memblock_size(64 * MB) + .with_max_host_memblock_size(32 * MB), + wgt::MemoryHints::Manual { + suballocated_device_memory_block_size, + } => { + // TODO: Would it be useful to expose the host size in memory hints + // instead of always using half of the device size? + let device_size = suballocated_device_memory_block_size; + let host_size = device_size.start / 2..device_size.end / 2; + + gpu_allocator::AllocationSizes::new(device_size.start, host_size.start) + .with_max_device_memblock_size(device_size.end) + .with_max_host_memblock_size(host_size.end) + } }; + + let buffer_device_address = enabled_extensions.contains(&khr::buffer_device_address::NAME); + + let mem_allocator = + gpu_allocator::vulkan::Allocator::new(&gpu_allocator::vulkan::AllocatorCreateDesc { + instance: self.instance.raw.clone(), + device: shared.raw.clone(), + physical_device: self.raw, + debug_settings: Default::default(), + buffer_device_address, + allocation_sizes, + })?; + let desc_allocator = gpu_descriptor::DescriptorAllocator::new( if let Some(di) = self.phd_capabilities.descriptor_indexing { di.max_update_after_bind_descriptors_in_all_pools diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 9069437103..470c511e66 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -242,105 +242,17 @@ impl super::DeviceShared { buffer: &'a super::Buffer, ranges: I, ) -> Option>> { - let block = buffer.block.as_ref()?.lock(); + let allocation = buffer.allocation.as_ref()?.lock(); let mask = self.private_caps.non_coherent_map_mask; Some(ranges.map(move |range| { vk::MappedMemoryRange::default() - .memory(*block.memory()) - .offset((block.offset() + range.start) & !mask) + .memory(allocation.memory()) + .offset((allocation.offset() + range.start) & !mask) .size((range.end - range.start + mask) & !mask) })) } } -impl gpu_alloc::MemoryDevice for super::DeviceShared { - unsafe fn allocate_memory( - &self, - size: u64, - memory_type: u32, - flags: gpu_alloc::AllocationFlags, - ) -> Result { - let mut info = vk::MemoryAllocateInfo::default() - .allocation_size(size) - .memory_type_index(memory_type); - - let mut info_flags; - - if flags.contains(gpu_alloc::AllocationFlags::DEVICE_ADDRESS) { - info_flags = vk::MemoryAllocateFlagsInfo::default() - .flags(vk::MemoryAllocateFlags::DEVICE_ADDRESS); - info = info.push_next(&mut info_flags); - } - - match unsafe { self.raw.allocate_memory(&info, None) } { - Ok(memory) => { - self.memory_allocations_counter.add(1); - Ok(memory) - } - Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { - Err(gpu_alloc::OutOfMemory::OutOfDeviceMemory) - } - Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { - Err(gpu_alloc::OutOfMemory::OutOfHostMemory) - } - // We don't use VK_KHR_external_memory - // VK_ERROR_INVALID_EXTERNAL_HANDLE - // We don't use VK_KHR_buffer_device_address - // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR - Err(err) => handle_unexpected(err), - } - } - - unsafe fn deallocate_memory(&self, memory: vk::DeviceMemory) { - self.memory_allocations_counter.sub(1); - - unsafe { self.raw.free_memory(memory, None) }; - } - - unsafe fn map_memory( - &self, - memory: &mut vk::DeviceMemory, - offset: u64, - size: u64, - ) -> Result, gpu_alloc::DeviceMapError> { - match unsafe { - self.raw - .map_memory(*memory, offset, size, vk::MemoryMapFlags::empty()) - } { - Ok(ptr) => Ok(ptr::NonNull::new(ptr.cast::()) - .expect("Pointer to memory mapping must not be null")), - Err(vk::Result::ERROR_OUT_OF_DEVICE_MEMORY) => { - Err(gpu_alloc::DeviceMapError::OutOfDeviceMemory) - } - Err(vk::Result::ERROR_OUT_OF_HOST_MEMORY) => { - Err(gpu_alloc::DeviceMapError::OutOfHostMemory) - } - Err(vk::Result::ERROR_MEMORY_MAP_FAILED) => Err(gpu_alloc::DeviceMapError::MapFailed), - Err(err) => handle_unexpected(err), - } - } - - unsafe fn unmap_memory(&self, memory: &mut vk::DeviceMemory) { - unsafe { self.raw.unmap_memory(*memory) }; - } - - unsafe fn invalidate_memory_ranges( - &self, - _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], - ) -> Result<(), gpu_alloc::OutOfMemory> { - // should never be called - unimplemented!() - } - - unsafe fn flush_memory_ranges( - &self, - _ranges: &[gpu_alloc::MappedMemoryRange<'_, vk::DeviceMemory>], - ) -> Result<(), gpu_alloc::OutOfMemory> { - // should never be called - unimplemented!() - } -} - impl gpu_descriptor::DescriptorDevice for super::DeviceShared @@ -642,7 +554,7 @@ impl super::Device { raw: vk_image, drop_guard, external_memory: None, - block: None, + allocation: None, format: desc.format, copy_size: desc.copy_extent(), identity, @@ -826,7 +738,7 @@ impl super::Device { raw: image.raw, drop_guard: None, external_memory: Some(memory), - block: None, + allocation: None, format: desc.format, copy_size: image.copy_size, identity, @@ -1105,59 +1017,49 @@ impl crate::Device for super::Device { .create_buffer(&vk_info, None) .map_err(super::map_host_device_oom_and_ioca_err)? }; - let req = unsafe { self.shared.raw.get_buffer_memory_requirements(raw) }; - let mut alloc_usage = if desc - .usage - .intersects(wgt::BufferUses::MAP_READ | wgt::BufferUses::MAP_WRITE) - { - let mut flags = gpu_alloc::UsageFlags::HOST_ACCESS; - //TODO: find a way to use `crate::MemoryFlags::PREFER_COHERENT` - flags.set( - gpu_alloc::UsageFlags::DOWNLOAD, - desc.usage.contains(wgt::BufferUses::MAP_READ), - ); - flags.set( - gpu_alloc::UsageFlags::UPLOAD, - desc.usage.contains(wgt::BufferUses::MAP_WRITE), - ); - flags - } else { - gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS + let requirements = unsafe { self.shared.raw.get_buffer_memory_requirements(raw) }; + + let is_cpu_read = desc.usage.contains(wgt::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(wgt::BufferUses::MAP_WRITE); + + let location = match (is_cpu_read, is_cpu_write) { + (true, true) => gpu_allocator::MemoryLocation::CpuToGpu, + (true, false) => gpu_allocator::MemoryLocation::GpuToCpu, + (false, true) => gpu_allocator::MemoryLocation::CpuToGpu, + (false, false) => gpu_allocator::MemoryLocation::GpuOnly, }; - alloc_usage.set( - gpu_alloc::UsageFlags::TRANSIENT, - desc.memory_flags.contains(crate::MemoryFlags::TRANSIENT), - ); - let needs_host_access = alloc_usage.contains(gpu_alloc::UsageFlags::HOST_ACCESS); + let needs_host_access = is_cpu_read || is_cpu_write; - self.error_if_would_oom_on_resource_allocation(needs_host_access, req.size) + self.error_if_would_oom_on_resource_allocation(needs_host_access, requirements.size) .inspect_err(|_| { unsafe { self.shared.raw.destroy_buffer(raw, None) }; })?; - let alignment_mask = req.alignment - 1; + let name = desc.label.unwrap_or("Unlabeled buffer"); - let block = unsafe { - self.mem_allocator.lock().alloc( - &*self.shared, - gpu_alloc::Request { - size: req.size, - align_mask: alignment_mask, - usage: alloc_usage, - memory_types: req.memory_type_bits & self.valid_ash_memory_types, + let allocation = self + .mem_allocator + .lock() + .allocate(&gpu_allocator::vulkan::AllocationCreateDesc { + name, + requirements: vk::MemoryRequirements { + memory_type_bits: requirements.memory_type_bits & self.valid_ash_memory_types, + ..requirements }, - ) - } - .inspect_err(|_| { - unsafe { self.shared.raw.destroy_buffer(raw, None) }; - })?; + location, + linear: true, // Buffers are always linear + allocation_scheme: gpu_allocator::vulkan::AllocationScheme::GpuAllocatorManaged, + }) + .inspect_err(|_| { + unsafe { self.shared.raw.destroy_buffer(raw, None) }; + })?; unsafe { self.shared .raw - .bind_buffer_memory(raw, *block.memory(), block.offset()) + .bind_buffer_memory(raw, allocation.memory(), allocation.offset()) } .map_err(super::map_host_device_oom_and_ioca_err) .inspect_err(|_| { @@ -1168,23 +1070,24 @@ impl crate::Device for super::Device { unsafe { self.shared.set_object_name(raw, label) }; } - self.counters.buffer_memory.add(block.size() as isize); + self.counters.buffer_memory.add(allocation.size() as isize); self.counters.buffers.add(1); Ok(super::Buffer { raw, - block: Some(Mutex::new(super::BufferMemoryBacking::Managed(block))), + allocation: Some(Mutex::new(super::BufferMemoryBacking::Managed(allocation))), }) } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { unsafe { self.shared.raw.destroy_buffer(buffer.raw, None) }; - if let Some(block) = buffer.block { - let block = block.into_inner(); - self.counters.buffer_memory.sub(block.size() as isize); - match block { - super::BufferMemoryBacking::Managed(block) => unsafe { - self.mem_allocator.lock().dealloc(&*self.shared, block) - }, + if let Some(allocation) = buffer.allocation { + let allocation = allocation.into_inner(); + self.counters.buffer_memory.sub(allocation.size() as isize); + match allocation { + super::BufferMemoryBacking::Managed(allocation) => { + // TODO: handle error + let _ = self.mem_allocator.lock().free(allocation); + } super::BufferMemoryBacking::VulkanMemory { memory, .. } => unsafe { self.shared.raw.free_memory(memory, None); }, @@ -1203,15 +1106,22 @@ impl crate::Device for super::Device { buffer: &super::Buffer, range: crate::MemoryRange, ) -> Result { - if let Some(ref block) = buffer.block { - let size = range.end - range.start; - let mut block = block.lock(); - if let super::BufferMemoryBacking::Managed(ref mut block) = *block { - let ptr = unsafe { block.map(&*self.shared, range.start, size as usize)? }; - let is_coherent = block - .props() - .contains(gpu_alloc::MemoryPropertyFlags::HOST_COHERENT); - Ok(crate::BufferMapping { ptr, is_coherent }) + if let Some(ref allocation) = buffer.allocation { + let mut allocation = allocation.lock(); + if let super::BufferMemoryBacking::Managed(ref mut allocation) = *allocation { + let is_coherent = allocation + .memory_properties() + .contains(vk::MemoryPropertyFlags::HOST_COHERENT); + Ok(crate::BufferMapping { + ptr: unsafe { + allocation + .mapped_ptr() + .unwrap() + .cast() + .offset(range.start as isize) + }, + is_coherent, + }) } else { crate::hal_usage_error("tried to map externally created buffer") } @@ -1219,17 +1129,9 @@ impl crate::Device for super::Device { crate::hal_usage_error("tried to map external buffer") } } - unsafe fn unmap_buffer(&self, buffer: &super::Buffer) { - if let Some(ref block) = buffer.block { - match &mut *block.lock() { - super::BufferMemoryBacking::Managed(block) => unsafe { block.unmap(&*self.shared) }, - super::BufferMemoryBacking::VulkanMemory { .. } => { - crate::hal_usage_error("tried to unmap externally created buffer") - } - }; - } else { - crate::hal_usage_error("tried to unmap external buffer") - } + + unsafe fn unmap_buffer(&self, _buffer: &super::Buffer) { + // gpu-allocator maps the buffer when allocated and unmap it when free'd } unsafe fn flush_mapped_ranges(&self, buffer: &super::Buffer, ranges: I) @@ -1274,27 +1176,32 @@ impl crate::Device for super::Device { unsafe { self.shared.raw.destroy_image(image.raw, None) }; })?; - let block = unsafe { - self.mem_allocator.lock().alloc( - &*self.shared, - gpu_alloc::Request { - size: image.requirements.size, - align_mask: image.requirements.alignment - 1, - usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, - memory_types: image.requirements.memory_type_bits & self.valid_ash_memory_types, + let name = desc.label.unwrap_or("Unlabeled texture"); + + let allocation = self + .mem_allocator + .lock() + .allocate(&gpu_allocator::vulkan::AllocationCreateDesc { + name, + requirements: vk::MemoryRequirements { + memory_type_bits: image.requirements.memory_type_bits + & self.valid_ash_memory_types, + ..image.requirements }, - ) - } - .inspect_err(|_| { - unsafe { self.shared.raw.destroy_image(image.raw, None) }; - })?; + location: gpu_allocator::MemoryLocation::GpuOnly, + linear: false, + allocation_scheme: gpu_allocator::vulkan::AllocationScheme::GpuAllocatorManaged, + }) + .inspect_err(|_| { + unsafe { self.shared.raw.destroy_image(image.raw, None) }; + })?; - self.counters.texture_memory.add(block.size() as isize); + self.counters.texture_memory.add(allocation.size() as isize); unsafe { self.shared .raw - .bind_image_memory(image.raw, *block.memory(), block.offset()) + .bind_image_memory(image.raw, allocation.memory(), allocation.offset()) } .map_err(super::map_host_device_oom_err) .inspect_err(|_| { @@ -1313,12 +1220,13 @@ impl crate::Device for super::Device { raw: image.raw, drop_guard: None, external_memory: None, - block: Some(block), + allocation: Some(allocation), format: desc.format, copy_size: image.copy_size, identity, }) } + unsafe fn destroy_texture(&self, texture: super::Texture) { if texture.drop_guard.is_none() { unsafe { self.shared.raw.destroy_image(texture.raw, None) }; @@ -1326,10 +1234,10 @@ impl crate::Device for super::Device { if let Some(memory) = texture.external_memory { unsafe { self.shared.raw.free_memory(memory, None) }; } - if let Some(block) = texture.block { - self.counters.texture_memory.sub(block.size() as isize); + if let Some(allocation) = texture.allocation { + self.counters.texture_memory.sub(allocation.size() as isize); - unsafe { self.mem_allocator.lock().dealloc(&*self.shared, block) }; + let _ = self.mem_allocator.lock().free(allocation); } self.counters.textures.sub(1); @@ -2693,32 +2601,35 @@ impl crate::Device for super::Device { .raw .create_buffer(&vk_buffer_info, None) .map_err(super::map_host_device_oom_and_ioca_err)?; - let req = self.shared.raw.get_buffer_memory_requirements(raw_buffer); - self.error_if_would_oom_on_resource_allocation(false, req.size) + let requirements = self.shared.raw.get_buffer_memory_requirements(raw_buffer); + + self.error_if_would_oom_on_resource_allocation(false, requirements.size) .inspect_err(|_| { self.shared.raw.destroy_buffer(raw_buffer, None); })?; - let block = self + let name = desc + .label + .unwrap_or("Unlabeled acceleration structure buffer"); + + let allocation = self .mem_allocator .lock() - .alloc( - &*self.shared, - gpu_alloc::Request { - size: req.size, - align_mask: req.alignment - 1, - usage: gpu_alloc::UsageFlags::FAST_DEVICE_ACCESS, - memory_types: req.memory_type_bits & self.valid_ash_memory_types, - }, - ) + .allocate(&gpu_allocator::vulkan::AllocationCreateDesc { + name, + requirements, + location: gpu_allocator::MemoryLocation::GpuOnly, + linear: true, // Buffers are always linear + allocation_scheme: gpu_allocator::vulkan::AllocationScheme::GpuAllocatorManaged, + }) .inspect_err(|_| { self.shared.raw.destroy_buffer(raw_buffer, None); })?; self.shared .raw - .bind_buffer_memory(raw_buffer, *block.memory(), block.offset()) + .bind_buffer_memory(raw_buffer, allocation.memory(), allocation.offset()) .map_err(super::map_host_device_oom_and_ioca_err) .inspect_err(|_| { self.shared.raw.destroy_buffer(raw_buffer, None); @@ -2771,7 +2682,7 @@ impl crate::Device for super::Device { Ok(super::AccelerationStructure { raw: raw_acceleration_structure, buffer: raw_buffer, - block: Mutex::new(block), + allocation, compacted_size_query: pool, }) } @@ -2795,9 +2706,10 @@ impl crate::Device for super::Device { self.shared .raw .destroy_buffer(acceleration_structure.buffer, None); - self.mem_allocator + let _ = self + .mem_allocator .lock() - .dealloc(&*self.shared, acceleration_structure.block.into_inner()); + .free(acceleration_structure.allocation); if let Some(query) = acceleration_structure.compacted_size_query { self.shared.raw.destroy_query_pool(query, None) } @@ -2812,6 +2724,39 @@ impl crate::Device for super::Device { self.counters.as_ref().clone() } + fn generate_allocator_report(&self) -> Option { + let gpu_allocator::AllocatorReport { + allocations, + blocks, + total_allocated_bytes, + total_capacity_bytes, + } = self.mem_allocator.lock().generate_report(); + + let allocations = allocations + .into_iter() + .map(|alloc| wgt::AllocationReport { + name: alloc.name, + offset: alloc.offset, + size: alloc.size, + }) + .collect(); + + let blocks = blocks + .into_iter() + .map(|block| wgt::MemoryBlockReport { + size: block.size, + allocations: block.allocations.clone(), + }) + .collect(); + + Some(wgt::AllocatorReport { + allocations, + blocks, + total_allocated_bytes, + total_reserved_bytes: total_capacity_bytes, + }) + } + fn tlas_instance_to_bytes(&self, instance: TlasInstance) -> Vec { const MAX_U24: u32 = (1u32 << 24u32) - 1u32; let temp = RawTlasInstance { @@ -2950,24 +2895,6 @@ impl super::DeviceShared { } } -impl From for crate::DeviceError { - fn from(error: gpu_alloc::AllocationError) -> Self { - use gpu_alloc::AllocationError as Ae; - match error { - Ae::OutOfDeviceMemory | Ae::OutOfHostMemory | Ae::TooManyObjects => Self::OutOfMemory, - Ae::NoCompatibleMemoryTypes => crate::hal_usage_error(error), - } - } -} -impl From for crate::DeviceError { - fn from(error: gpu_alloc::MapError) -> Self { - use gpu_alloc::MapError as Me; - match error { - Me::OutOfDeviceMemory | Me::OutOfHostMemory | Me::MapFailed => Self::OutOfMemory, - Me::NonHostVisible | Me::AlreadyMapped => crate::hal_usage_error(error), - } - } -} impl From for crate::DeviceError { fn from(error: gpu_descriptor::AllocationError) -> Self { use gpu_descriptor::AllocationError as Ae; diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 572ffcd3f0..560c06bd72 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -1157,7 +1157,7 @@ impl crate::Surface for super::Surface { texture: super::Texture { raw: swapchain.images[index as usize], drop_guard: None, - block: None, + allocation: None, external_memory: None, format: swapchain.config.format, copy_size: crate::CopyExtent { diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index d807411e67..af52f2f09f 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -757,8 +757,7 @@ impl Drop for DeviceShared { } pub struct Device { - shared: Arc, - mem_allocator: Mutex>, + mem_allocator: Mutex, desc_allocator: Mutex>, valid_ash_memory_types: u32, @@ -766,11 +765,13 @@ pub struct Device { #[cfg(feature = "renderdoc")] render_doc: crate::auxil::renderdoc::RenderDoc, counters: Arc, + // Struct members are dropped from first to last, put the Device last to ensure that + // all resources that depends on it are destroyed before it like the mem_allocator + shared: Arc, } impl Drop for Device { fn drop(&mut self) { - unsafe { self.mem_allocator.lock().cleanup(&*self.shared) }; unsafe { self.desc_allocator.lock().cleanup(&*self.shared) }; } } @@ -872,7 +873,7 @@ impl Drop for Queue { } #[derive(Debug)] enum BufferMemoryBacking { - Managed(gpu_alloc::MemoryBlock), + Managed(gpu_allocator::vulkan::Allocation), VulkanMemory { memory: vk::DeviceMemory, offset: u64, @@ -880,10 +881,10 @@ enum BufferMemoryBacking { }, } impl BufferMemoryBacking { - fn memory(&self) -> &vk::DeviceMemory { + fn memory(&self) -> vk::DeviceMemory { match self { - Self::Managed(m) => m.memory(), - Self::VulkanMemory { memory, .. } => memory, + Self::Managed(m) => unsafe { m.memory() }, + Self::VulkanMemory { memory, .. } => *memory, } } fn offset(&self) -> u64 { @@ -902,7 +903,7 @@ impl BufferMemoryBacking { #[derive(Debug)] pub struct Buffer { raw: vk::Buffer, - block: Option>, + allocation: Option>, } impl Buffer { /// # Safety @@ -912,7 +913,7 @@ impl Buffer { pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self { Self { raw: vk_buffer, - block: None, + allocation: None, } } /// # Safety @@ -927,7 +928,7 @@ impl Buffer { ) -> Self { Self { raw: vk_buffer, - block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory { + allocation: Some(Mutex::new(BufferMemoryBacking::VulkanMemory { memory, offset, size, @@ -942,7 +943,7 @@ impl crate::DynBuffer for Buffer {} pub struct AccelerationStructure { raw: vk::AccelerationStructureKHR, buffer: vk::Buffer, - block: Mutex>, + allocation: gpu_allocator::vulkan::Allocation, compacted_size_query: Option, } @@ -952,7 +953,7 @@ impl crate::DynAccelerationStructure for AccelerationStructure {} pub struct Texture { raw: vk::Image, external_memory: Option, - block: Option>, + allocation: Option, format: wgt::TextureFormat, copy_size: crate::CopyExtent, identity: ResourceIdentity,