diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 558abad45c5..8ca61fef08a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -126,6 +126,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool, &device->dynamic_state_pool, 16384); anv_state_stream_init(&cmd_buffer->general_state_stream, &device->general_state_pool, 16384); + anv_state_stream_init(&cmd_buffer->push_descriptor_stream, + &device->push_descriptor_pool, 4096); int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8, sizeof(struct anv_bo *)); @@ -175,6 +177,7 @@ anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer) anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->general_state_stream); + anv_state_stream_finish(&cmd_buffer->push_descriptor_stream); while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) { struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos); @@ -220,6 +223,10 @@ anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer, anv_state_stream_init(&cmd_buffer->general_state_stream, &cmd_buffer->device->general_state_pool, 16384); + anv_state_stream_finish(&cmd_buffer->push_descriptor_stream); + anv_state_stream_init(&cmd_buffer->push_descriptor_stream, + &cmd_buffer->device->push_descriptor_pool, 4096); + while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) { struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos); anv_device_release_bo(cmd_buffer->device, *bo); @@ -950,11 +957,17 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, if (layout->descriptor_buffer_size && ((*push_set)->set_used_on_gpu || set->desc_mem.alloc_size < layout->descriptor_buffer_size)) { + struct anv_physical_device *pdevice = cmd_buffer->device->physical; + struct anv_state_stream *push_stream = + pdevice->indirect_descriptors ? + &cmd_buffer->push_descriptor_stream : + &cmd_buffer->surface_state_stream; + /* The previous buffer is either actively used by some GPU command (so * we can't modify it) or is too small. Allocate a new one. */ struct anv_state desc_mem = - anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + anv_state_stream_alloc(push_stream, anv_descriptor_set_layout_descriptor_buffer_size(layout, 0), ANV_UBO_ALIGNMENT); if (set->desc_mem.alloc_size) { @@ -964,10 +977,9 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, } set->desc_mem = desc_mem; - set->desc_addr = (struct anv_address) { - .bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo, - .offset = set->desc_mem.offset, - }; + set->desc_addr = anv_state_pool_state_address( + push_stream->state_pool, + set->desc_mem); } return set; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 73ba4636093..c8176580164 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1353,6 +1353,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, device->compiler->supports_shader_constants = true; device->compiler->indirect_ubos_use_sampler = device->info.ver < 12; device->compiler->extended_bindless_surface_offset = device->uses_ex_bso; + device->compiler->use_bindless_sampler_offset = !device->indirect_descriptors; isl_device_init(&device->isl_dev, &device->info); @@ -2849,10 +2850,13 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) return ret_bo; if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address)) return ret_bo; - if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address)) + if (device->physical->indirect_descriptors && + get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address)) return ret_bo; if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address)) return ret_bo; + if (get_bo_from_pool(&ret_bo, &device->push_descriptor_pool.block_pool, address)) + return ret_bo; if (!device->cmd_buffer_being_decoded) return (struct intel_batch_decode_bo) { }; @@ -3108,6 +3112,10 @@ VkResult anv_CreateDevice( device->physical->va.high_heap.addr, device->physical->va.high_heap.size); + util_vma_heap_init(&device->vma_desc, + device->physical->va.descriptor_pool.addr, + device->physical->va.descriptor_pool.size); + list_inithead(&device->memory_objects); list_inithead(&device->image_private_objects); @@ -3198,12 +3206,14 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_scratch_surface_state_pool; - result = anv_state_pool_init(&device->bindless_surface_state_pool, device, - "bindless surface state pool", - device->physical->va.bindless_surface_state_pool.addr, - 0, 4096); - if (result != VK_SUCCESS) - goto fail_internal_surface_state_pool; + if (device->physical->indirect_descriptors) { + result = anv_state_pool_init(&device->bindless_surface_state_pool, device, + "bindless surface state pool", + device->physical->va.bindless_surface_state_pool.addr, + 0, 4096); + if (result != VK_SUCCESS) + goto fail_internal_surface_state_pool; + } if (device->info->verx10 >= 125) { /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding @@ -3232,11 +3242,18 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_bindless_surface_state_pool; + result = anv_state_pool_init(&device->push_descriptor_pool, device, + "push descriptor pool", + device->physical->va.push_descriptor_pool.addr, + 0, 4096); + if (result != VK_SUCCESS) + goto fail_binding_table_pool; + if (device->info->has_aux_map) { device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator, &physical_device->info); if (!device->aux_map_ctx) - goto fail_binding_table_pool; + goto fail_push_descriptor_pool; } result = anv_device_alloc_bo(device, "workaround", 8192, @@ -3413,10 +3430,13 @@ VkResult anv_CreateDevice( intel_aux_map_finish(device->aux_map_ctx); device->aux_map_ctx = NULL; } + fail_push_descriptor_pool: + anv_state_pool_finish(&device->push_descriptor_pool); fail_binding_table_pool: anv_state_pool_finish(&device->binding_table_pool); fail_bindless_surface_state_pool: - anv_state_pool_finish(&device->bindless_surface_state_pool); + if (device->physical->indirect_descriptors) + anv_state_pool_finish(&device->bindless_surface_state_pool); fail_internal_surface_state_pool: anv_state_pool_finish(&device->internal_surface_state_pool); fail_scratch_surface_state_pool: @@ -3437,6 +3457,7 @@ VkResult anv_CreateDevice( fail_mutex: pthread_mutex_destroy(&device->mutex); fail_vmas: + util_vma_heap_finish(&device->vma_desc); util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_cva); util_vma_heap_finish(&device->vma_lo); @@ -3513,11 +3534,13 @@ void anv_DestroyDevice( device->aux_map_ctx = NULL; } + anv_state_pool_finish(&device->push_descriptor_pool); anv_state_pool_finish(&device->binding_table_pool); if (device->info->verx10 >= 125) anv_state_pool_finish(&device->scratch_surface_state_pool); anv_state_pool_finish(&device->internal_surface_state_pool); - anv_state_pool_finish(&device->bindless_surface_state_pool); + if (device->physical->indirect_descriptors) + anv_state_pool_finish(&device->bindless_surface_state_pool); anv_state_pool_finish(&device->instruction_state_pool); anv_state_pool_finish(&device->dynamic_state_pool); anv_state_pool_finish(&device->general_state_pool); @@ -3526,6 +3549,7 @@ void anv_DestroyDevice( anv_bo_cache_finish(&device->bo_cache); + util_vma_heap_finish(&device->vma_desc); util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_cva); util_vma_heap_finish(&device->vma_lo); @@ -3588,6 +3612,9 @@ anv_vma_heap_for_flags(struct anv_device *device, if (alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS) return &device->vma_lo; + if (alloc_flags & ANV_BO_ALLOC_DESCRIPTOR_POOL) + return &device->vma_desc; + return &device->vma_hi; } @@ -3634,7 +3661,8 @@ anv_vma_free(struct anv_device *device, { assert(vma_heap == &device->vma_lo || vma_heap == &device->vma_cva || - vma_heap == &device->vma_hi); + vma_heap == &device->vma_hi || + vma_heap == &device->vma_desc); const uint64_t addr_48b = intel_48b_address(address); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index e052a70b631..9f4671ba4e8 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -384,6 +384,9 @@ enum anv_bo_alloc_flags { /** This buffer will be scanout to display */ ANV_BO_ALLOC_SCANOUT = (1 << 12), + + /** For descriptor pools */ + ANV_BO_ALLOC_DESCRIPTOR_POOL = (1 << 13), }; struct anv_bo { @@ -956,6 +959,8 @@ struct anv_physical_device { struct anv_va_range scratch_surface_state_pool; struct anv_va_range bindless_surface_state_pool; struct anv_va_range instruction_state_pool; + struct anv_va_range descriptor_pool; + struct anv_va_range push_descriptor_pool; struct anv_va_range client_visible_heap; struct anv_va_range high_heap; } va; @@ -1123,6 +1128,7 @@ struct anv_device { struct util_vma_heap vma_lo; struct util_vma_heap vma_cva; struct util_vma_heap vma_hi; + struct util_vma_heap vma_desc; /** List of all anv_device_memory objects */ struct list_head memory_objects; @@ -1142,6 +1148,7 @@ struct anv_device { struct anv_state_pool scratch_surface_state_pool; struct anv_state_pool internal_surface_state_pool; struct anv_state_pool bindless_surface_state_pool; + struct anv_state_pool push_descriptor_pool; struct anv_state_reserved_pool custom_border_colors; @@ -2827,6 +2834,7 @@ struct anv_cmd_buffer { struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; struct anv_state_stream general_state_stream; + struct anv_state_stream push_descriptor_stream; VkCommandBufferUsageFlags usage_flags; diff --git a/src/intel/vulkan/anv_va.c b/src/intel/vulkan/anv_va.c index f7cc7039f66..b5b8f23b9c3 100644 --- a/src/intel/vulkan/anv_va.c +++ b/src/intel/vulkan/anv_va.c @@ -55,7 +55,10 @@ anv_device_print_vas(struct anv_physical_device *device) PRINT_HEAP(dynamic_state_pool); PRINT_HEAP(binding_table_pool); PRINT_HEAP(internal_surface_state_pool); + PRINT_HEAP(scratch_surface_state_pool); PRINT_HEAP(bindless_surface_state_pool); + PRINT_HEAP(descriptor_pool); + PRINT_HEAP(push_descriptor_pool); PRINT_HEAP(instruction_state_pool); PRINT_HEAP(client_visible_heap); PRINT_HEAP(high_heap); @@ -106,13 +109,34 @@ anv_physical_device_init_va_ranges(struct anv_physical_device *device) * binding tables can address internal surface states & bindless surface * states. */ + address = align64(address, _4Gb); address = va_add(&device->va.binding_table_pool, address, _1Gb); - address = va_add(&device->va.internal_surface_state_pool, address, 2 * _1Gb); + address = va_add(&device->va.internal_surface_state_pool, address, 1 * _1Gb); /* Scratch surface state overlaps with the internal surface state */ va_at(&device->va.scratch_surface_state_pool, device->va.internal_surface_state_pool.addr, 8 * _1Mb); - address = va_add(&device->va.bindless_surface_state_pool, address, _1Gb); + + /* Both of the following heaps have be in the same 4Gb range from the + * binding table pool start so they can be addressed from binding table + * entries. + */ + if (device->indirect_descriptors) { + /* With indirect descriptors, we allocate bindless surface states from + * this pool. + */ + address = va_add(&device->va.bindless_surface_state_pool, address, 2 * _1Gb); + + /* Descriptor buffers can go anywhere */ + address = align64(address, _4Gb); + address = va_add(&device->va.descriptor_pool, address, 3 * _1Gb); + address = va_add(&device->va.push_descriptor_pool, address, _1Gb); + } else { + /* With direct descriptor, descriptors set buffers are allocated + * here. + */ + address = va_add(&device->va.descriptor_pool, address, 2 * _1Gb); + } /* We use a trick to compute constant data offsets in the shaders to avoid * unnecessary 64bit address computations (see lower_load_constant() in diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f550053ae14..bf660f5b1bb 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -183,20 +183,47 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.IndirectObjectBufferSizeModifyEnable = true; sba.DynamicStateBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; - sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { .offset = - device->physical->va.bindless_surface_state_pool.addr, - }; - sba.BindlessSurfaceStateSize = - anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; - sba.BindlessSurfaceStateMOCS = mocs; - sba.BindlessSurfaceStateBaseAddressModifyEnable = true; -#if GFX_VER >= 11 - sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSamplerStateBaseAddressModifyEnable = true; - sba.BindlessSamplerStateBufferSize = 0; + + if (!device->physical->indirect_descriptors) { +#if GFX_VERx10 >= 125 + /* Bindless Surface State & Bindless Sampler State are aligned to the + * same heap + */ + sba.BindlessSurfaceStateBaseAddress = + sba.BindlessSamplerStateBaseAddress = + (struct anv_address) { .offset = + device->physical->va.binding_table_pool.addr, }; + sba.BindlessSurfaceStateSize = + (device->physical->va.binding_table_pool.size + + device->physical->va.internal_surface_state_pool.size + + device->physical->va.descriptor_pool.size) - 1; + sba.BindlessSamplerStateBufferSize = + (device->physical->va.binding_table_pool.size + + device->physical->va.internal_surface_state_pool.size + + device->physical->va.descriptor_pool.size) / 4096 - 1; + sba.BindlessSurfaceStateMOCS = sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = + sba.BindlessSamplerStateBaseAddressModifyEnable = true; +#else + unreachable("Direct descriptor not supported"); #endif + } else { + sba.BindlessSurfaceStateBaseAddress = + (struct anv_address) { .offset = + device->physical->va.bindless_surface_state_pool.addr, + }; + sba.BindlessSurfaceStateSize = + anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; +#if GFX_VER >= 11 + sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; + sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSamplerStateBaseAddressModifyEnable = true; + sba.BindlessSamplerStateBufferSize = 0; +#endif + } + #if GFX_VERx10 >= 125 sba.L1CacheControl = L1CC_WB; #endif diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 582553b0f05..8c2f5f87b63 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -261,18 +261,39 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) sba.InstructionBaseAddressModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; - sba.BindlessSurfaceStateBaseAddress = - (struct anv_address) { .offset = - device->physical->va.bindless_surface_state_pool.addr, }; - sba.BindlessSurfaceStateSize = - anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; - sba.BindlessSurfaceStateMOCS = mocs; - sba.BindlessSurfaceStateBaseAddressModifyEnable = true; + if (device->physical->indirect_descriptors) { + sba.BindlessSurfaceStateBaseAddress = + (struct anv_address) { .offset = + device->physical->va.bindless_surface_state_pool.addr, + }; + sba.BindlessSurfaceStateSize = + anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1; + sba.BindlessSurfaceStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; - sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSamplerStateMOCS = mocs; - sba.BindlessSamplerStateBaseAddressModifyEnable = true; - sba.BindlessSamplerStateBufferSize = 0; + sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; + sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSamplerStateBaseAddressModifyEnable = true; + sba.BindlessSamplerStateBufferSize = 0; + } else { + /* Bindless Surface State & Bindless Sampler State are aligned to the + * same heap + */ + sba.BindlessSurfaceStateBaseAddress = + sba.BindlessSamplerStateBaseAddress = + (struct anv_address) { .offset = device->physical->va.binding_table_pool.addr, }; + sba.BindlessSurfaceStateSize = + (device->physical->va.binding_table_pool.size + + device->physical->va.internal_surface_state_pool.size + + device->physical->va.descriptor_pool.size) - 1; + sba.BindlessSamplerStateBufferSize = + (device->physical->va.binding_table_pool.size + + device->physical->va.internal_surface_state_pool.size + + device->physical->va.descriptor_pool.size) / 4096 - 1; + sba.BindlessSurfaceStateMOCS = sba.BindlessSamplerStateMOCS = mocs; + sba.BindlessSurfaceStateBaseAddressModifyEnable = + sba.BindlessSamplerStateBaseAddressModifyEnable = true; + } #if GFX_VERx10 >= 125 sba.L1CacheControl = L1CC_WB; diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index ff6e7d1ae94..b3dc1b2edf2 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -344,9 +344,17 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, if (result != VK_SUCCESS) return result; - result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool); - if (result != VK_SUCCESS) - return result; + if (device->physical->va.bindless_surface_state_pool.size > 0) { + result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool); + if (result != VK_SUCCESS) + return result; + } + + if (device->physical->va.push_descriptor_pool.size > 0) { + result = pin_state_pool(device, execbuf, &device->push_descriptor_pool); + if (result != VK_SUCCESS) + return result; + } result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool); if (result != VK_SUCCESS)