diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 249bbe8c7ef..7a5504d8d10 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -149,10 +149,10 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) marker.cb_id = 0; marker.device_id_low = device_id; marker.device_id_high = device_id >> 32; - marker.queue = cmd_buffer->queue_family_index; + marker.queue = cmd_buffer->qf; marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT; - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) + if (cmd_buffer->qf == RADV_QUEUE_GENERAL) marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 0585be30418..c1795b5c662 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -61,8 +61,8 @@ enum { static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, bool src_render_loop, VkImageLayout dst_layout, - bool dst_render_loop, uint32_t src_family, - uint32_t dst_family, const VkImageSubresourceRange *range, + bool dst_render_loop, uint32_t src_family_index, + uint32_t dst_family_index, const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs); static void radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size); @@ -356,12 +356,13 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { - return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && + return cmd_buffer->qf == RADV_QUEUE_COMPUTE && cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; } enum ring_type -radv_queue_family_to_ring(int f) +radv_queue_family_to_ring(struct radv_physical_device *physical_device, + enum radv_queue_family f) { switch (f) { case RADV_QUEUE_GENERAL: @@ -459,9 +460,9 @@ radv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool, cmd_buffer->pool = pool; list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - cmd_buffer->queue_family_index = pool->vk.queue_family_index; + cmd_buffer->qf = vk_queue_to_radv(device->physical_device, pool->vk.queue_family_index); - ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); + ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); cmd_buffer->cs = device->ws->cs_create(device->ws, ring); if (!cmd_buffer->cs) { @@ -530,7 +531,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) } if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 && - cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + cmd_buffer->qf == RADV_QUEUE_GENERAL) { unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; unsigned fence_offset, eop_bug_offset; void *fence_ptr; @@ -709,7 +710,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pip va = radv_buffer_get_va(device->trace_bo); - ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); + ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); switch (ring) { case RING_GFX: @@ -1752,16 +1753,16 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, if (!radv_layout_dcc_compressed( cmd_buffer->device, image, iview->base_mip, layout, in_render_loop, - radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index)) || + radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf)) || disable_dcc) { cb_color_info &= C_028C70_DCC_ENABLE; } if (!radv_layout_fmask_compressed( cmd_buffer->device, image, layout, - radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { + radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf))) { cb_color_info &= C_028C70_COMPRESSION; } @@ -1870,8 +1871,8 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_ if (!radv_layout_is_htile_compressed( cmd_buffer->device, image, layout, in_render_loop, - radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { + radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf))) { db_z_info &= C_028040_TILE_SURFACE_ENABLE; } @@ -1911,8 +1912,8 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ if (!radv_layout_is_htile_compressed( cmd_buffer->device, image, layout, in_render_loop, - radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { + radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf))) { db_z_info &= C_028040_TILE_SURFACE_ENABLE; db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); } @@ -2539,8 +2540,8 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) if (radv_layout_is_htile_compressed( cmd_buffer->device, iview->image, layout, in_render_loop, - radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { + radv_image_queue_family_mask(iview->image, cmd_buffer->qf, + cmd_buffer->qf))) { /* Only load the depth/stencil fast clear values when * compressed rendering is enabled. */ @@ -4972,7 +4973,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) radv_emit_mip_change_flush_default(cmd_buffer); - if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { + if (cmd_buffer->qf != RADV_QUEUE_TRANSFER) { if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; @@ -5647,7 +5648,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou allow_ib2 = false; } - if (secondary->queue_family_index == RADV_QUEUE_COMPUTE) { + if (secondary->qf == RADV_QUEUE_COMPUTE) { /* IB2 packets are not supported on compute queues according to PAL. */ allow_ib2 = false; } @@ -8291,33 +8292,35 @@ radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, bool src_render_loop, - VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family, - uint32_t dst_family, const VkImageSubresourceRange *range, + VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family_index, + uint32_t dst_family_index, const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs) { - if (image->exclusive && src_family != dst_family) { + enum radv_queue_family src_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, src_family_index); + enum radv_queue_family dst_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, dst_family_index); + if (image->exclusive && src_family_index != dst_family_index) { /* This is an acquire or a release operation and there will be * a corresponding release/acquire. Do the transition in the * most flexible queue. */ - assert(src_family == cmd_buffer->queue_family_index || - dst_family == cmd_buffer->queue_family_index); + assert(src_qf == cmd_buffer->qf || + dst_qf == cmd_buffer->qf); - if (src_family == VK_QUEUE_FAMILY_EXTERNAL || src_family == VK_QUEUE_FAMILY_FOREIGN_EXT) + if (src_family_index == VK_QUEUE_FAMILY_EXTERNAL || src_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT) return; - if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) return; - if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - (src_family == RADV_QUEUE_GENERAL || dst_family == RADV_QUEUE_GENERAL)) + if (cmd_buffer->qf == RADV_QUEUE_COMPUTE && + (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL)) return; } unsigned src_queue_mask = - radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index); + radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf); unsigned dst_queue_mask = - radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index); + radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf); if (src_layout == dst_layout && src_render_loop == dst_render_loop && src_queue_mask == dst_queue_mask) return; @@ -8628,7 +8631,7 @@ radv_CmdBeginConditionalRenderingEXT( si_emit_cache_flush(cmd_buffer); - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL && + if (cmd_buffer->qf == RADV_QUEUE_GENERAL && !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) { uint64_t pred_value = 0, pred_va; unsigned pred_offset; diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 178cf10e288..c1e86867647 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -520,7 +520,7 @@ radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring) static void radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) { - enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); + enum ring_type ring = radv_queue_ring(queue); struct radv_pipeline *pipeline; fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE"); @@ -631,7 +631,7 @@ radv_dump_device_name(struct radv_device *device, FILE *f) static void radv_dump_umr_ring(struct radv_queue *queue, FILE *f) { - enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); + enum ring_type ring = radv_queue_ring(queue); struct radv_device *device = queue->device; char cmd[128]; @@ -649,7 +649,7 @@ radv_dump_umr_ring(struct radv_queue *queue, FILE *f) static void radv_dump_umr_waves(struct radv_queue *queue, FILE *f) { - enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); + enum ring_type ring = radv_queue_ring(queue); struct radv_device *device = queue->device; char cmd[128]; @@ -682,7 +682,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) enum ring_type ring; uint64_t addr; - ring = radv_queue_family_to_ring(queue->vk.queue_family_index); + ring = radv_queue_ring(queue); bool hang_occurred = radv_gpu_hang_occured(queue, ring); bool vm_fault_occurred = false; @@ -989,7 +989,7 @@ radv_dump_sq_hw_regs(struct radv_device *device) void radv_check_trap_handler(struct radv_queue *queue) { - enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); + enum ring_type ring = radv_queue_ring(queue); struct radv_device *device = queue->device; struct radeon_winsys *ws = device->ws; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 3a8dd394a63..afc22391e6a 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -565,6 +565,20 @@ radv_is_conformant(const struct radv_physical_device *pdevice) return pdevice->rad_info.chip_class >= GFX8; } +static void +radv_physical_device_init_queue_table(struct radv_physical_device *pdevice) +{ + pdevice->vk_queue_to_radv[0] = RADV_QUEUE_GENERAL; + + for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++) + pdevice->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1; + + if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 && + !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { + pdevice->vk_queue_to_radv[1] = RADV_QUEUE_COMPUTE; + } +} + static VkResult radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device, struct radv_physical_device **device_out) @@ -786,6 +800,8 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm if ((device->instance->debug_flags & RADV_DEBUG_INFO)) ac_print_gpu_info(&device->rad_info, stdout); + radv_physical_device_init_queue_table(device); + /* The WSI is structured as a layer on top of the driver, so this has * to be the last part of initialization (at least until we get other * semi-layers). @@ -2652,6 +2668,7 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, queue->device = device; queue->priority = radv_get_queue_global_priority(global_priority); queue->hw_ctx = device->hw_ctx[queue->priority]; + queue->qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex); VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx); if (result != VK_SUCCESS) @@ -3813,7 +3830,7 @@ radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, struct radeon_winsys_bo *scratch_bo) { - if (queue->vk.queue_family_index != RADV_QUEUE_GENERAL) + if (queue->qf != RADV_QUEUE_GENERAL) return; if (!scratch_bo) @@ -3939,7 +3956,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, unsigned tess_offchip_ring_offset; uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING; VkResult result = VK_SUCCESS; - if (queue->vk.queue_family_index == RADV_QUEUE_TRANSFER) + if (queue->qf == RADV_QUEUE_TRANSFER) return VK_SUCCESS; if (!queue->has_tess_rings) { @@ -4121,7 +4138,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, enum rgp_flush_bits sqtt_flush_bits = 0; struct radeon_cmdbuf *cs = NULL; cs = queue->device->ws->cs_create(queue->device->ws, - queue->vk.queue_family_index ? RING_COMPUTE : RING_GFX); + radv_queue_ring(queue)); if (!cs) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; @@ -4133,7 +4150,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, radv_cs_add_buffer(queue->device->ws, cs, scratch_bo); /* Emit initial configuration. */ - switch (queue->vk.queue_family_index) { + switch (queue->qf) { case RADV_QUEUE_GENERAL: radv_init_graphics_state(cs, queue); break; @@ -4141,6 +4158,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, radv_init_compute_state(cs, queue); break; case RADV_QUEUE_TRANSFER: + default: break; } @@ -4168,9 +4186,9 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, if (i == 0) { si_cs_emit_cache_flush( cs, queue->device->physical_device->rad_info.chip_class, NULL, 0, - queue->vk.queue_family_index == RING_COMPUTE && + queue->qf == RADV_QUEUE_COMPUTE && queue->device->physical_device->rad_info.chip_class >= GFX7, - (queue->vk.queue_family_index == RADV_QUEUE_COMPUTE + (queue->qf == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | @@ -4178,7 +4196,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave, &sqtt_flush_bits, 0); } else if (i == 1) { si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0, - queue->vk.queue_family_index == RING_COMPUTE && + queue->qf == RADV_QUEUE_COMPUTE && queue->device->physical_device->rad_info.chip_class >= GFX7, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 | @@ -4484,6 +4502,7 @@ radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) struct radeon_cmdbuf *initial_preamble_cs = NULL; struct radeon_cmdbuf *initial_flush_preamble_cs = NULL; struct radeon_cmdbuf *continue_preamble_cs = NULL; + enum ring_type ring = radv_queue_ring(queue); result = radv_get_preambles(queue, submission->command_buffers, submission->command_buffer_count, @@ -4514,7 +4533,7 @@ radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) return VK_SUCCESS; if (!submission->command_buffer_count) { - result = queue->device->ws->cs_submit(ctx, queue->vk.queue_family_index, + result = queue->device->ws->cs_submit(ctx, ring, queue->vk.index_in_family, NULL, 0, NULL, NULL, submission->wait_count, submission->waits, submission->signal_count, submission->signals, false); @@ -4552,7 +4571,7 @@ radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) *queue->device->trace_id_ptr = 0; result = queue->device->ws->cs_submit( - ctx, queue->vk.queue_family_index, queue->vk.index_in_family, cs_array + j, advance, + ctx, ring, queue->vk.index_in_family, cs_array + j, advance, initial_preamble, continue_preamble_cs, j == 0 ? submission->wait_count : 0, submission->waits, last_submit ? submission->signal_count : 0, submission->signals, can_patch); @@ -4597,7 +4616,7 @@ radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs) struct radeon_winsys_ctx *ctx = queue->hw_ctx; VkResult result = - queue->device->ws->cs_submit(ctx, queue->vk.queue_family_index, queue->vk.index_in_family, + queue->device->ws->cs_submit(ctx, radv_queue_ring(queue), queue->vk.index_in_family, &cs, 1, NULL, NULL, 0, NULL, 0, NULL, false); if (result != VK_SUCCESS) return false; diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index c90f016e755..d41d6c927ea 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -1833,7 +1833,8 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT) image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; else - image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; + image->queue_family_mask |= 1u << vk_queue_to_radv(device->physical_device, + pCreateInfo->pQueueFamilyIndices[i]); } const VkExternalMemoryImageCreateInfo *external_info = @@ -2301,7 +2302,9 @@ radv_layout_fmask_compressed(const struct radv_device *device, const struct radv } unsigned -radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family) +radv_image_queue_family_mask(const struct radv_image *image, + enum radv_queue_family family, + enum radv_queue_family queue_family) { if (!image->exclusive) return image->queue_family_mask; diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 5e6ce674d48..c3fd99830b4 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -657,8 +657,8 @@ depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (!iview) return false; - uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->qf, + cmd_buffer->qf); if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || clear_rect->rect.extent.width != iview->extent.width || clear_rect->rect.extent.height != iview->extent.height) @@ -969,8 +969,8 @@ radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (!radv_layout_is_htile_compressed( cmd_buffer->device, iview->image, image_layout, in_render_loop, - radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) + radv_image_queue_family_mask(iview->image, cmd_buffer->qf, + cmd_buffer->qf))) return false; if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || @@ -1798,8 +1798,8 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (!radv_layout_can_fast_clear( cmd_buffer->device, iview->image, iview->base_mip, image_layout, in_render_loop, - radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) + radv_image_queue_family_mask(iview->image, cmd_buffer->qf, + cmd_buffer->qf))) return false; if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || @@ -2360,8 +2360,8 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag format = VK_FORMAT_R32_UINT; internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32); - uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf); for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; @@ -2439,7 +2439,7 @@ radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageL struct radv_meta_saved_state saved_state; bool cs; - cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE || + cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image); if (cs) { diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c index eb539b40c7f..10ba45080da 100644 --- a/src/amd/vulkan/radv_meta_copy.c +++ b/src/amd/vulkan/radv_meta_copy.c @@ -136,7 +136,7 @@ copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf */ assert(image->info.samples == 1); - cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE || + cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image); radv_meta_save(&saved_state, cmd_buffer, @@ -178,8 +178,8 @@ copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask); if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) { - uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf); bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, region->imageSubresource.mipLevel, layout, false, queue_mask); @@ -278,7 +278,7 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf struct radv_image *image, VkImageLayout layout, const VkBufferImageCopy2KHR *region) { - if (cmd_buffer->pool->vk.queue_family_index == RADV_QUEUE_TRANSFER) { + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { /* RADV_QUEUE_TRANSFER should only be used for the prime blit */ assert(!region->imageOffset.x && !region->imageOffset.y && !region->imageOffset.z); assert(image->type == VK_IMAGE_TYPE_2D); @@ -333,8 +333,8 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf image, layout, ®ion->imageSubresource, region->imageSubresource.aspectMask); if (!radv_is_buffer_format_supported(img_info.format, NULL)) { - uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, + cmd_buffer->qf); bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, region->imageSubresource.mipLevel, layout, false, queue_mask); @@ -420,7 +420,7 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, */ assert(src_image->info.samples == dst_image->info.samples); - cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE || + cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, dst_image); radv_meta_save(&saved_state, cmd_buffer, @@ -437,8 +437,8 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, /* For partial copies, HTILE should be decompressed before copying because the metadata is * re-initialized to the uncompressed state after. */ - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, + cmd_buffer->qf); if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, false, queue_mask) && @@ -480,12 +480,12 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, dst_image, dst_image_layout, ®ion->dstSubresource, dst_aspects[a]); uint32_t dst_queue_mask = radv_image_queue_family_mask( - dst_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index); + dst_image, cmd_buffer->qf, cmd_buffer->qf); bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, false, dst_queue_mask); uint32_t src_queue_mask = radv_image_queue_family_mask( - src_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index); + src_image, cmd_buffer->qf, cmd_buffer->qf); bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, region->srcSubresource.mipLevel, src_image_layout, false, src_queue_mask); @@ -581,8 +581,8 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, if (cs) { /* Fixup HTILE after a copy on compute. */ - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, + cmd_buffer->qf); if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_image_layout, false, queue_mask)) { diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c index 6a5203ecbc8..ca83f907af7 100644 --- a/src/amd/vulkan/radv_meta_decompress.c +++ b/src/amd/vulkan/radv_meta_decompress.c @@ -727,7 +727,7 @@ radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image barrier.layout_transitions.depth_stencil_expand = 1; radv_describe_layout_transition(cmd_buffer, &barrier); - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + if (cmd_buffer->qf == RADV_QUEUE_GENERAL) { radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS); } else { radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange); @@ -744,6 +744,6 @@ radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_i barrier.layout_transitions.depth_stencil_resummarize = 1; radv_describe_layout_transition(cmd_buffer, &barrier); - assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); + assert(cmd_buffer->qf == RADV_QUEUE_GENERAL); radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE); } diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index 4fd2079a606..5089cb339e7 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -920,7 +920,7 @@ radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image barrier.layout_transitions.dcc_decompress = 1; radv_describe_layout_transition(cmd_buffer, &barrier); - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) + if (cmd_buffer->qf == RADV_QUEUE_GENERAL) radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS); else radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange); diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c index 2c4a8952df2..9797d16587c 100644 --- a/src/amd/vulkan/radv_meta_resolve.c +++ b/src/amd/vulkan/radv_meta_resolve.c @@ -375,8 +375,8 @@ radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *s enum radv_resolve_method *method) { - uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf, + cmd_buffer->qf); if (vk_format_is_color(src_format)) { /* Using the fragment resolve path is currently a hint to @@ -490,8 +490,8 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv const struct VkOffset3D dstOffset = radv_sanitize_image_offset(dst_image->type, region->dstOffset); - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, + cmd_buffer->qf); if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout, false, queue_mask)) { @@ -693,8 +693,8 @@ radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer) struct radv_image *dst_img = dest_iview->image; VkImageLayout dst_image_layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout; - uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf, + cmd_buffer->qf); if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dest_iview->base_mip, dst_image_layout, false, queue_mask)) { diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index a3da78a3355..3c0f9dec9d5 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -666,8 +666,8 @@ radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_ /* For partial resolves, DCC should be decompressed before resolving * because the metadata is re-initialized to the uncompressed after. */ - uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf, + cmd_buffer->qf); if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) && radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel, @@ -921,8 +921,8 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer, radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT_KHR, NULL); VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout; - uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index); + uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, + cmd_buffer->qf); if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) { VkImageSubresourceRange range = {0}; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 22c202fab63..1bcc590d24e 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -251,6 +251,15 @@ const char *radv_get_instance_entry_name(int index); const char *radv_get_physical_device_entry_name(int index); const char *radv_get_device_entry_name(int index); +/* queue types */ +enum radv_queue_family { + RADV_QUEUE_GENERAL, + RADV_QUEUE_COMPUTE, + RADV_QUEUE_TRANSFER, + RADV_MAX_QUEUE_FAMILIES, + RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES, +}; + struct radv_physical_device { struct vk_physical_device vk; @@ -315,6 +324,8 @@ struct radv_physical_device { #endif nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES]; + + enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES]; }; struct radv_instance { @@ -681,21 +692,20 @@ struct radv_meta_state { } etc_decode; }; -/* queue types */ -#define RADV_QUEUE_GENERAL 0 -#define RADV_QUEUE_COMPUTE 1 -#define RADV_QUEUE_TRANSFER 2 - -/* Not a real queue family */ -#define RADV_QUEUE_FOREIGN 3 - -#define RADV_MAX_QUEUE_FAMILIES 3 - #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1) struct radv_deferred_queue_submission; -enum ring_type radv_queue_family_to_ring(int f); +static inline enum radv_queue_family +vk_queue_to_radv(struct radv_physical_device *phys_dev, + int queue_family_index) +{ + assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES); + return phys_dev->vk_queue_to_radv[queue_family_index]; +} + +enum ring_type radv_queue_family_to_ring(struct radv_physical_device *physical_device, + enum radv_queue_family f); struct radv_queue { struct vk_queue vk; @@ -703,6 +713,7 @@ struct radv_queue { struct radeon_winsys_ctx *hw_ctx; enum radeon_ctx_priority priority; + enum radv_queue_family qf; uint32_t scratch_size_per_wave; uint32_t scratch_waves; uint32_t compute_scratch_size_per_wave; @@ -1522,7 +1533,7 @@ struct radv_cmd_buffer { struct radv_cmd_state state; struct radv_vertex_binding vertex_bindings[MAX_VBS]; struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS]; - uint32_t queue_family_index; + enum radv_queue_family qf; uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; VkShaderStageFlags push_constant_stages; @@ -2300,8 +2311,9 @@ radv_image_get_iterate256(struct radv_device *device, struct radv_image *image) image->info.samples > 1; } -unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, - uint32_t queue_family); +unsigned radv_image_queue_family_mask(const struct radv_image *image, + enum radv_queue_family family, + enum radv_queue_family queue_family); static inline uint32_t radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range) @@ -2856,6 +2868,16 @@ si_translate_blend_logic_op(VkLogicOp op) } } +/* + * Queue helper to get ring. + * placed here as it needs queue + device structs. + */ +static inline enum ring_type +radv_queue_ring(struct radv_queue *queue) +{ + return radv_queue_family_to_ring(queue->device->physical_device, queue->qf); +} + /** * Helper used for debugging compiler issues by enabling/disabling LLVM for a * specific shader stage (developers only). diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 24fd99982d7..34e7c45576d 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -59,7 +59,7 @@ gfx10_get_thread_trace_ctrl(struct radv_device *device, bool enable) static void radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *cs, - uint32_t queue_family_index) + enum radv_queue_family qf) { uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; struct radeon_info *rad_info = &device->physical_device->rad_info; @@ -177,7 +177,7 @@ radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *c S_030800_INSTANCE_BROADCAST_WRITES(1)); /* Start the thread trace with a different event based on the queue. */ - if (queue_family_index == RADV_QUEUE_COMPUTE) { + if (qf == RADV_QUEUE_COMPUTE) { radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(1)); } else { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); @@ -236,12 +236,12 @@ radv_copy_thread_trace_info_regs(struct radv_device *device, struct radeon_cmdbu static void radv_emit_thread_trace_stop(struct radv_device *device, struct radeon_cmdbuf *cs, - uint32_t queue_family_index) + enum radv_queue_family qf) { unsigned max_se = device->physical_device->rad_info.max_se; /* Stop the thread trace with a different event based on the queue. */ - if (queue_family_index == RADV_QUEUE_COMPUTE) { + if (qf == RADV_QUEUE_COMPUTE) { radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(0)); } else { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); @@ -530,7 +530,7 @@ bool radv_begin_thread_trace(struct radv_queue *queue) { struct radv_device *device = queue->device; - int family = queue->vk.queue_family_index; + enum radv_queue_family family = queue->qf; struct radeon_winsys *ws = device->ws; struct radeon_cmdbuf *cs; VkResult result; @@ -541,7 +541,7 @@ radv_begin_thread_trace(struct radv_queue *queue) device->thread_trace.start_cs[family] = NULL; } - cs = ws->cs_create(ws, family); + cs = ws->cs_create(ws, radv_queue_ring(queue)); if (!cs) return false; @@ -555,6 +555,9 @@ radv_begin_thread_trace(struct radv_queue *queue) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, 0); break; + default: + unreachable("Incorrect queue family"); + break; } /* Make sure to wait-for-idle before starting SQTT. */ @@ -596,7 +599,7 @@ bool radv_end_thread_trace(struct radv_queue *queue) { struct radv_device *device = queue->device; - int family = queue->vk.queue_family_index; + enum radv_queue_family family = queue->qf; struct radeon_winsys *ws = device->ws; struct radeon_cmdbuf *cs; VkResult result; @@ -607,7 +610,7 @@ radv_end_thread_trace(struct radv_queue *queue) device->thread_trace.stop_cs[family] = NULL; } - cs = ws->cs_create(ws, family); + cs = ws->cs_create(ws, radv_queue_ring(queue)); if (!cs) return false; @@ -621,6 +624,9 @@ radv_end_thread_trace(struct radv_queue *queue) radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, 0); break; + default: + unreachable("Incorrect queue family"); + break; } /* Make sure to wait-for-idle before stopping SQTT. */ diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 4e1856eb60d..20a1b3acaea 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1354,7 +1354,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) { - bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; + bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE; if (is_compute) cmd_buffer->state.flush_bits &= @@ -1522,7 +1522,7 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src * should precede it. */ if (flags & CP_DMA_SYNC) { - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + if (cmd_buffer->qf == RADV_QUEUE_GENERAL) { radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); radeon_emit(cs, 0); }