#[cfg(feature = "std")] use alloc::sync::Arc; use alloc::{boxed::Box, string::ToString, vec::Vec}; #[cfg(feature = "std")] use std::backtrace::Backtrace; use log::debug; use objc2::{rc::Retained, runtime::ProtocolObject}; use objc2_foundation::{ns_string, NSString}; #[cfg(doc)] use objc2_metal::{MTLAllocation, MTLResource}; use objc2_metal::{ MTLCPUCacheMode, MTLDevice, MTLHeap, MTLHeapDescriptor, MTLHeapType, MTLResidencySet, MTLResourceOptions, MTLStorageMode, MTLTextureDescriptor, }; #[cfg(feature = "visualizer")] mod visualizer; #[cfg(feature = "visualizer")] pub use visualizer::AllocatorVisualizer; use crate::{ allocator::{ AllocationType, AllocatorReport, DedicatedBlockAllocator, FreeListAllocator, MemoryBlockReport, SubAllocator, }, AllocationError, AllocationSizes, AllocatorDebugSettings, MemoryLocation, Result, }; fn memory_location_to_metal(location: MemoryLocation) -> MTLResourceOptions { match location { MemoryLocation::GpuOnly => MTLResourceOptions::StorageModePrivate, MemoryLocation::CpuToGpu | MemoryLocation::GpuToCpu | MemoryLocation::Unknown => { MTLResourceOptions::StorageModeShared } } } #[derive(Debug)] pub struct Allocation { chunk_id: Option, offset: u64, size: u64, memory_block_index: usize, memory_type_index: usize, heap: Retained>, name: Option>, } impl Allocation { /// Returns the [`MTLHeap`] object that is backing this allocation. /// /// This heap object can be shared with multiple other allocations and shouldn't be allocated from /// without this library, because that will lead to undefined behavior. /// /// # Safety /// When allocating new buffers, textures, or other resources on this [`MTLHeap`], be sure to /// pass [`Self::offset()`] and not exceed [`Self::size()`] to not allocate new resources on top /// of existing [`Allocation`]s. /// /// Also, this [`Allocation`] must not be [`Allocator::free()`]d while such a created resource /// on this [`MTLHeap`] is still live. pub unsafe fn heap(&self) -> &ProtocolObject { &self.heap } /// Returns the size of the allocation pub fn size(&self) -> u64 { self.size } /// Returns the offset of the allocation on the [`MTLHeap`]. /// /// Since all [`Allocation`]s are suballocated within a [`MTLHeap`], this offset always needs to /// be supplied. See the safety documentation on [`Self::heap()`]. pub fn offset(&self) -> u64 { self.offset } pub fn name(&self) -> Option<&str> { self.name.as_deref() } fn is_null(&self) -> bool { self.chunk_id.is_none() } } #[derive(Clone, Debug)] pub struct AllocationCreateDesc<'a> { /// Name of the allocation, for tracking and debugging purposes pub name: &'a str, /// Location where the memory allocation should be stored pub location: MemoryLocation, pub size: u64, pub alignment: u64, } impl<'a> AllocationCreateDesc<'a> { pub fn buffer( device: &ProtocolObject, name: &'a str, length: u64, location: MemoryLocation, ) -> Self { let size_and_align = device.heapBufferSizeAndAlignWithLength_options( length as usize, memory_location_to_metal(location), ); Self { name, location, size: size_and_align.size as u64, alignment: size_and_align.align as u64, } } pub fn texture( device: &ProtocolObject, name: &'a str, desc: &MTLTextureDescriptor, ) -> Self { let size_and_align = device.heapTextureSizeAndAlignWithDescriptor(desc); Self { name, location: match desc.storageMode() { MTLStorageMode::Shared | MTLStorageMode::Managed | MTLStorageMode::Memoryless => { MemoryLocation::Unknown } MTLStorageMode::Private => MemoryLocation::GpuOnly, MTLStorageMode(mode /* @ 4.. */) => todo!("Unknown storage mode {mode}"), }, size: size_and_align.size as u64, alignment: size_and_align.align as u64, } } pub fn acceleration_structure_with_size( device: &ProtocolObject, name: &'a str, size: u64, // TODO: usize location: MemoryLocation, ) -> Self { // TODO: See if we can mark this function as safe, after checking what happens if size is too large? // What other preconditions need to be upheld? let size_and_align = unsafe { device.heapAccelerationStructureSizeAndAlignWithSize(size as usize) }; Self { name, location, size: size_and_align.size as u64, alignment: size_and_align.align as u64, } } } pub struct Allocator { device: Retained>, global_residency_set: Option>>, debug_settings: AllocatorDebugSettings, memory_types: Vec, allocation_sizes: AllocationSizes, } impl core::fmt::Debug for Allocator { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.generate_report().fmt(f) } } #[derive(Debug)] pub struct AllocatorCreateDesc { pub device: Retained>, pub debug_settings: AllocatorDebugSettings, pub allocation_sizes: AllocationSizes, /// Whether to create a [`MTLResidencySet`] containing all live heaps, that can be retrieved via /// [`Allocator::residency_set()`]. Only supported on `MacOS 15.0+` / `iOS 18.0+`. pub create_residency_set: bool, } #[derive(Debug)] pub struct CommittedAllocationStatistics { pub num_allocations: usize, pub total_size: u64, } #[derive(Debug)] struct MemoryBlock { heap: Retained>, size: u64, sub_allocator: Box, } impl MemoryBlock { fn new( device: &ProtocolObject, size: u64, heap_descriptor: &MTLHeapDescriptor, dedicated: bool, memory_location: MemoryLocation, ) -> Result { heap_descriptor.setSize(size as usize); let heap = device .newHeapWithDescriptor(heap_descriptor) .ok_or_else(|| AllocationError::Internal("No MTLHeap was returned".to_string()))?; heap.setLabel(Some(&NSString::from_str(&format!( "MemoryBlock {memory_location:?}" )))); let sub_allocator: Box = if dedicated { Box::new(DedicatedBlockAllocator::new(size)) } else { Box::new(FreeListAllocator::new(size)) }; Ok(Self { heap, size, sub_allocator, }) } } #[derive(Debug)] struct MemoryType { global_residency_set: Option>>, memory_blocks: Vec>, _committed_allocations: CommittedAllocationStatistics, memory_location: MemoryLocation, heap_properties: Retained, memory_type_index: usize, active_general_blocks: usize, } impl MemoryType { fn allocate( &mut self, device: &ProtocolObject, desc: &AllocationCreateDesc<'_>, #[cfg(feature = "std")] backtrace: Arc, allocation_sizes: &AllocationSizes, ) -> Result { let allocation_type = AllocationType::Linear; let is_host = self.heap_properties.storageMode() != MTLStorageMode::Private; let memblock_size = allocation_sizes.get_memblock_size(is_host, self.active_general_blocks); let size = desc.size; let alignment = desc.alignment; // Create a dedicated block for large memory allocations if size > memblock_size { let mem_block = MemoryBlock::new( device, size, &self.heap_properties, true, self.memory_location, )?; if let Some(rs) = &self.global_residency_set { unsafe { rs.addAllocation(mem_block.heap.as_ref()) } } let block_index = self.memory_blocks.iter().position(|block| block.is_none()); let block_index = match block_index { Some(i) => { self.memory_blocks[i].replace(mem_block); i } None => { self.memory_blocks.push(Some(mem_block)); self.memory_blocks.len() - 1 } }; let mem_block = self.memory_blocks[block_index] .as_mut() .ok_or_else(|| AllocationError::Internal("Memory block must be Some".into()))?; let (offset, chunk_id) = mem_block.sub_allocator.allocate( size, alignment, allocation_type, 1, desc.name, #[cfg(feature = "std")] backtrace, )?; return Ok(Allocation { chunk_id: Some(chunk_id), size, offset, memory_block_index: block_index, memory_type_index: self.memory_type_index, heap: mem_block.heap.clone(), name: Some(desc.name.into()), }); } let mut empty_block_index = None; for (mem_block_i, mem_block) in self.memory_blocks.iter_mut().enumerate().rev() { if let Some(mem_block) = mem_block { let allocation = mem_block.sub_allocator.allocate( size, alignment, allocation_type, 1, desc.name, #[cfg(feature = "std")] backtrace.clone(), ); match allocation { Ok((offset, chunk_id)) => { return Ok(Allocation { chunk_id: Some(chunk_id), offset, size, memory_block_index: mem_block_i, memory_type_index: self.memory_type_index, heap: mem_block.heap.clone(), name: Some(desc.name.into()), }); } Err(AllocationError::OutOfMemory) => {} // Block is full, continue search. Err(err) => return Err(err), // Unhandled error, return. } } else if empty_block_index.is_none() { empty_block_index = Some(mem_block_i); } } let mem_block = MemoryBlock::new( device, memblock_size, &self.heap_properties, false, self.memory_location, )?; if let Some(rs) = &self.global_residency_set { unsafe { rs.addAllocation(mem_block.heap.as_ref()) } } let new_block_index = if let Some(block_index) = empty_block_index { self.memory_blocks[block_index] = Some(mem_block); block_index } else { self.memory_blocks.push(Some(mem_block)); self.memory_blocks.len() - 1 }; self.active_general_blocks += 1; let mem_block = self.memory_blocks[new_block_index] .as_mut() .ok_or_else(|| AllocationError::Internal("Memory block must be Some".into()))?; let allocation = mem_block.sub_allocator.allocate( size, alignment, allocation_type, 1, desc.name, #[cfg(feature = "std")] backtrace, ); let (offset, chunk_id) = match allocation { Err(AllocationError::OutOfMemory) => Err(AllocationError::Internal( "Allocation that must succeed failed. This is a bug in the allocator.".into(), )), a => a, }?; Ok(Allocation { chunk_id: Some(chunk_id), offset, size, memory_block_index: new_block_index, memory_type_index: self.memory_type_index, heap: mem_block.heap.clone(), name: Some(desc.name.into()), }) } fn free(&mut self, allocation: &Allocation) -> Result<()> { let block_idx = allocation.memory_block_index; let mem_block = self.memory_blocks[block_idx] .as_mut() .ok_or_else(|| AllocationError::Internal("Memory block must be Some.".into()))?; mem_block.sub_allocator.free(allocation.chunk_id)?; // We only want to destroy this now-empty block if it is either a dedicated/personal // allocation, or a block supporting sub-allocations that is not the last one (ensuring // there's always at least one block/allocator readily available). let is_dedicated_or_not_last_general_block = !mem_block.sub_allocator.supports_general_allocations() || self.active_general_blocks > 1; if mem_block.sub_allocator.is_empty() && is_dedicated_or_not_last_general_block { let block = self.memory_blocks[block_idx] .take() .ok_or_else(|| AllocationError::Internal("Memory block must be Some.".into()))?; if block.sub_allocator.supports_general_allocations() { self.active_general_blocks -= 1; } if let Some(rs) = &self.global_residency_set { unsafe { rs.removeAllocation(block.heap.as_ref()) } } // Note that `block` will be destroyed on `drop` here } Ok(()) } } impl Allocator { pub fn new(desc: &AllocatorCreateDesc) -> Result { let heap_types = [ (MemoryLocation::GpuOnly, { let heap_desc = unsafe { MTLHeapDescriptor::new() }; heap_desc.setCpuCacheMode(MTLCPUCacheMode::DefaultCache); heap_desc.setStorageMode(MTLStorageMode::Private); heap_desc.setType(MTLHeapType::Placement); heap_desc }), (MemoryLocation::CpuToGpu, { let heap_desc = unsafe { MTLHeapDescriptor::new() }; heap_desc.setCpuCacheMode(MTLCPUCacheMode::WriteCombined); heap_desc.setStorageMode(MTLStorageMode::Shared); heap_desc.setType(MTLHeapType::Placement); heap_desc }), (MemoryLocation::GpuToCpu, { let heap_desc = unsafe { MTLHeapDescriptor::new() }; heap_desc.setCpuCacheMode(MTLCPUCacheMode::DefaultCache); heap_desc.setStorageMode(MTLStorageMode::Shared); heap_desc.setType(MTLHeapType::Placement); heap_desc }), ]; let global_residency_set = if desc.create_residency_set { Some(unsafe { let rs_desc = objc2_metal::MTLResidencySetDescriptor::new(); rs_desc.setLabel(Some(ns_string!("gpu-allocator global residency set"))); desc.device .newResidencySetWithDescriptor_error(&rs_desc) .expect("Failed to create MTLResidencySet. Unsupported MacOS/iOS version?") }) } else { None }; let memory_types = heap_types .into_iter() .enumerate() .map(|(i, (memory_location, heap_descriptor))| MemoryType { global_residency_set: global_residency_set.clone(), memory_blocks: vec![], _committed_allocations: CommittedAllocationStatistics { num_allocations: 0, total_size: 0, }, memory_location, heap_properties: heap_descriptor, memory_type_index: i, active_general_blocks: 0, }) .collect(); Ok(Self { device: desc.device.clone(), debug_settings: desc.debug_settings, memory_types, allocation_sizes: desc.allocation_sizes, global_residency_set, }) } pub fn allocate(&mut self, desc: &AllocationCreateDesc<'_>) -> Result { let size = desc.size; let alignment = desc.alignment; #[cfg(feature = "std")] let backtrace = Arc::new(if self.debug_settings.store_stack_traces { Backtrace::force_capture() } else { Backtrace::disabled() }); if self.debug_settings.log_allocations { debug!( "Allocating `{}` of {} bytes with an alignment of {}.", &desc.name, size, alignment ); #[cfg(feature = "std")] if self.debug_settings.log_stack_traces { let backtrace = Backtrace::force_capture(); debug!("Allocation stack trace: {backtrace}"); } } if size == 0 || !alignment.is_power_of_two() { return Err(AllocationError::InvalidAllocationCreateDesc); } // Find memory type let memory_type = self .memory_types .iter_mut() .find(|memory_type| { // Is location compatible desc.location == MemoryLocation::Unknown || desc.location == memory_type.memory_location }) .ok_or(AllocationError::NoCompatibleMemoryTypeFound)?; memory_type.allocate( &self.device, desc, #[cfg(feature = "std")] backtrace, &self.allocation_sizes, ) } pub fn free(&mut self, allocation: &Allocation) -> Result<()> { if self.debug_settings.log_frees { let name = allocation.name.as_deref().unwrap_or(""); debug!("Freeing `{name}`."); #[cfg(feature = "std")] if self.debug_settings.log_stack_traces { let backtrace = Backtrace::force_capture(); debug!("Free stack trace: {backtrace}"); } } if allocation.is_null() { return Ok(()); } self.memory_types[allocation.memory_type_index].free(allocation)?; Ok(()) } /// Returns heaps for all memory blocks pub fn heaps(&self) -> impl Iterator> { self.memory_types.iter().flat_map(|memory_type| { memory_type .memory_blocks .iter() .flatten() .map(|block| block.heap.as_ref()) }) } pub fn generate_report(&self) -> AllocatorReport { let mut allocations = vec![]; let mut blocks = vec![]; let mut total_capacity_bytes = 0; for memory_type in &self.memory_types { for block in memory_type.memory_blocks.iter().flatten() { total_capacity_bytes += block.size; let first_allocation = allocations.len(); allocations.extend(block.sub_allocator.report_allocations()); blocks.push(MemoryBlockReport { size: block.size, allocations: first_allocation..allocations.len(), }); } } let total_allocated_bytes = allocations.iter().map(|report| report.size).sum(); AllocatorReport { allocations, blocks, total_allocated_bytes, total_capacity_bytes, } } /// Current total capacity of memory blocks allocated on the device, in bytes pub fn capacity(&self) -> u64 { let mut total_capacity_bytes = 0; for memory_type in &self.memory_types { for block in memory_type.memory_blocks.iter().flatten() { total_capacity_bytes += block.size; } } total_capacity_bytes } /// Optional residency set containing all heap allocations created/owned by this allocator to /// be made resident at once when its allocations are used on the GPU. The caller _must_ invoke /// [`MTLResidencySet::commit()`] whenever these resources are used to make sure the latest /// changes are visible to Metal, e.g. before committing a command buffer. /// /// This residency set can be attached to individual command buffers or to a queue directly /// since usage of allocated resources is expected to be global. /// /// Alternatively callers can build up their own residency set(s) based on individual /// [`MTLAllocation`]s [^heap-allocation] rather than making all heaps allocated via /// `gpu-allocator` resident at once. /// /// [^heap-allocation]: Note that [`MTLHeap`]s returned by [`Allocator::heaps()`] are also /// allocations. If individual placed [`MTLResource`]s on a heap are made resident, the entire /// heap will be made resident. /// /// Callers still need to be careful to make resources created outside of `gpu-allocator` /// resident on the GPU, such as indirect command buffers. /// /// This residency set is only available when requested via /// [`AllocatorCreateDesc::create_residency_set`], otherwise this function returns [`None`]. pub fn residency_set(&self) -> Option<&Retained>> { // Return the retained object so that the caller also has a way to store it, since we will // keep using and updating the same object going forward. self.global_residency_set.as_ref() } }