diff --git a/src/imagination/rogue/rogue.h b/src/imagination/rogue/rogue.h index de59bb33a28..9cb665de5dd 100644 --- a/src/imagination/rogue/rogue.h +++ b/src/imagination/rogue/rogue.h @@ -2879,6 +2879,9 @@ typedef struct rogue_build_data { bool phas; /* Indicates the presence of PHAS instruction. */ } fs; struct rogue_vs_build_data { + /* TODO: Should these be removed since the driver allocates the vertex + * inputs? + */ rogue_vertex_inputs inputs; unsigned num_vertex_input_regs; /* Final number of inputs. */ diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h index 00c424a54c5..e5793ba4ed9 100644 --- a/src/imagination/vulkan/pds/pvr_pds.h +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -978,7 +978,7 @@ struct pvr_pds_vertex_primary_program_input { /* Control for the DOUTU that kicks the vertex USC shader. */ struct pvr_pds_usc_task_control usc_task_control; /* List of DMAs (of size dma_count). */ - struct pvr_pds_vertex_dma *dma_list; + const struct pvr_pds_vertex_dma *dma_list; uint32_t dma_count; /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */ diff --git a/src/imagination/vulkan/pds/pvr_pipeline_pds.c b/src/imagination/vulkan/pds/pvr_pipeline_pds.c index dbd18d6720e..226b1f95bae 100644 --- a/src/imagination/vulkan/pds/pvr_pipeline_pds.c +++ b/src/imagination/vulkan/pds/pvr_pipeline_pds.c @@ -744,7 +744,8 @@ void pvr_pds_generate_vertex_primary_program( uint32_t control_word; struct pvr_const_map_entry_literal32 *literal_entry; - struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma]; + const struct pvr_pds_vertex_dma *vertex_dma = + &input_program->dma_list[dma]; bool last_dma = (++running_dma_count == total_dma_count); pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_dma); diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index d0861a6e64a..59c97c60f56 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -3044,6 +3044,15 @@ pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer, (struct pvr_const_map_entry_vertex_attribute_address *)entries; const struct pvr_vertex_binding *const binding = &state->vertex_bindings[attribute->binding_index]; + /* In relation to the Vulkan spec. 22.4. Vertex Input Address + * Calculation: + * Adding binding->offset corresponds to calculating the + * `bufferBindingAddress`. Adding attribute->offset corresponds to + * adding the `attribDesc.offset`. The `effectiveVertexOffset` is + * taken care by the PDS program itself with a DDMAD which will + * multiply the vertex/instance idx with the binding's stride and + * add that to the address provided here. + */ const pvr_dev_addr_t addr = PVR_DEV_ADDR_OFFSET(binding->buffer->dev_addr, binding->offset + attribute->offset); diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index bb4e98f198c..cebcc9272b5 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -49,6 +49,7 @@ #include "util/u_dynarray.h" #include "util/u_math.h" #include "vk_alloc.h" +#include "vk_format.h" #include "vk_graphics_state.h" #include "vk_log.h" #include "vk_object.h" @@ -461,26 +462,41 @@ typedef struct pvr_pds_attrib_program (*const pvr_pds_attrib_programs_array_ptr) /* If allocator == NULL, the internal one will be used. * * programs_out_ptr is a pointer to the array where the outputs will be placed. - * */ + */ static VkResult pvr_pds_vertex_attrib_programs_create_and_upload( struct pvr_device *device, const VkAllocationCallbacks *const allocator, const VkPipelineVertexInputStateCreateInfo *const vertex_input_state, uint32_t usc_temp_count, const struct rogue_vs_build_data *vs_data, + + /* Needed for the new path. */ + /* TODO: Remove some of the above once the compiler is hooked up. */ + const struct pvr_pds_vertex_dma + dma_descriptions[static const PVR_MAX_VERTEX_ATTRIB_DMAS], + uint32_t dma_count, + pvr_pds_attrib_programs_array_ptr programs_out_ptr) { - struct pvr_pds_vertex_dma dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS]; + struct pvr_pds_vertex_dma dma_descriptions_old[PVR_MAX_VERTEX_ATTRIB_DMAS]; + struct pvr_pds_attrib_program *const programs_out = *programs_out_ptr; - struct pvr_pds_vertex_primary_program_input input = { - .dma_list = dma_descriptions, - }; + struct pvr_pds_vertex_primary_program_input input = { 0 }; VkResult result; - pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state, - vs_data, - &dma_descriptions, - &input.dma_count); + const bool old_path = pvr_has_hard_coded_shaders(&device->pdevice->dev_info); + + if (old_path) { + pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state, + vs_data, + &dma_descriptions_old, + &input.dma_count); + + input.dma_list = dma_descriptions_old; + } else { + input.dma_list = dma_descriptions; + input.dma_count = dma_count; + } pvr_pds_setup_doutu(&input.usc_task_control, 0, @@ -1446,6 +1462,7 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device, static void pvr_vertex_state_init(struct pvr_graphics_pipeline *gfx_pipeline, const struct rogue_common_build_data *common_data, + uint32_t vtxin_regs_used, const struct rogue_vs_build_data *vs_data) { struct pvr_vertex_shader_state *vertex_state = @@ -1464,7 +1481,7 @@ pvr_vertex_state_init(struct pvr_graphics_pipeline *gfx_pipeline, vertex_state->stage_state.has_side_effects = false; vertex_state->stage_state.empty_program = false; - vertex_state->vertex_input_size = vs_data->num_vertex_input_regs; + vertex_state->vertex_input_size = vtxin_regs_used; vertex_state->vertex_output_size = vs_data->num_vertex_outputs * ROGUE_REG_SIZE_BYTES; vertex_state->user_clip_planes_mask = 0; @@ -1628,6 +1645,174 @@ static uint32_t pvr_graphics_pipeline_alloc_shareds( #undef PVR_DEV_ADDR_SIZE_IN_SH_REGS +static void pvr_graphics_pipeline_alloc_vertex_inputs( + const VkPipelineVertexInputStateCreateInfo *const vs_data, + rogue_vertex_inputs *const vertex_input_layout_out, + unsigned *num_vertex_input_regs_out, + pvr_pds_attrib_dma_descriptions_array_ptr dma_descriptions_out_ptr, + uint32_t *const dma_count_out) +{ + const VkVertexInputBindingDescription + *sorted_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 }; + const VkVertexInputAttributeDescription + *sorted_attributes[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 }; + + rogue_vertex_inputs build_data = { + .num_input_vars = vs_data->vertexAttributeDescriptionCount, + }; + uint32_t next_reg_offset = 0; + + struct pvr_pds_vertex_dma *const dma_descriptions = + *dma_descriptions_out_ptr; + uint32_t dma_count = 0; + + /* Vertex attributes map to the `layout(location = x)` annotation in the + * shader where `x` is the attribute's location. + * Vertex bindings have NO relation to the shader. They have nothing to do + * with the `layout(set = x, binding = y)` notation. They instead indicate + * where the data for a collection of vertex attributes comes from. The + * application binds a VkBuffer with vkCmdBindVertexBuffers() to a specific + * binding number and based on that we'll know which buffer to DMA the data + * from, to fill in the collection of vertex attributes. + */ + + for (uint32_t i = 0; i < vs_data->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *binding_desc = + &vs_data->pVertexBindingDescriptions[i]; + + sorted_bindings[binding_desc->binding] = binding_desc; + } + + for (uint32_t i = 0; i < vs_data->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *attribute_desc = + &vs_data->pVertexAttributeDescriptions[i]; + + sorted_attributes[attribute_desc->location] = attribute_desc; + } + + for (uint32_t i = 0, j = 0; i < ARRAY_SIZE(sorted_attributes); i++) { + if (sorted_attributes[i]) + sorted_attributes[j++] = sorted_attributes[i]; + } + + for (uint32_t i = 0; i < vs_data->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *attribute = sorted_attributes[i]; + const VkVertexInputBindingDescription *binding = + sorted_bindings[attribute->binding]; + const struct util_format_description *fmt_description = + vk_format_description(attribute->format); + struct pvr_pds_vertex_dma *dma_desc = &dma_descriptions[dma_count]; + unsigned vtxin_reg_offset; + + /* Reg allocation. */ + + vtxin_reg_offset = next_reg_offset; + build_data.base[i] = vtxin_reg_offset; + + if (fmt_description->colorspace != UTIL_FORMAT_COLORSPACE_RGB || + fmt_description->layout != UTIL_FORMAT_LAYOUT_PLAIN || + fmt_description->block.bits % 32 != 0 || !fmt_description->is_array) { + /* For now we only support formats with 32 bit components since we + * don't need to pack/unpack them. + */ + /* TODO: Support any other format with VERTEX_BUFFER_BIT set that + * doesn't have 32 bit components if we're advertising any. + */ + assert(false); + } + + /* TODO: Check if this is fine with the compiler. Does it want the amount + * of components or does it want a size in dwords to figure out how many + * vtxin regs are covered. For formats with 32 bit components the + * distinction doesn't change anything. + */ + build_data.components[i] = + util_format_get_nr_components(fmt_description->format); + + next_reg_offset += build_data.components[i]; + + /* DMA setup. */ + + /* The PDS program sets up DDMADs to DMA attributes into vtxin regs. + * + * DDMAD -> Multiply, add, and DOUTD (i.e. DMA from that address). + * DMA source addr = src0 * src1 + src2 + * DMA params = src3 + * + * In the PDS program we setup src0 with the binding's stride and src1 + * with either the instance id or vertex id (both of which get filled by + * the hardware). We setup src2 later on once we know which VkBuffer to + * DMA the data from so it's saved for later when we patch the data + * section. + */ + + /* TODO: Right now we're setting up a DMA per attribute. In a case where + * there are multiple attributes packed into a single binding with + * adjacent locations we'd still be DMAing them separately. This is not + * great so the DMA setup should be smarter and could do with some + * optimization. + */ + + *dma_desc = (struct pvr_pds_vertex_dma){ 0 }; + + /* In relation to the Vulkan spec. 22.4. Vertex Input Address Calculation + * this corresponds to `attribDesc.offset`. + * The PDS program doesn't do anything with it but just save it in the + * PDS program entry. + */ + dma_desc->offset = attribute->offset; + + /* In relation to the Vulkan spec. 22.4. Vertex Input Address Calculation + * this corresponds to `bindingDesc.stride`. + * The PDS program will calculate the `effectiveVertexOffset` with this + * and add it to the address provided in the patched data segment. + */ + dma_desc->stride = binding->stride; + + if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) + dma_desc->flags = PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE; + else + dma_desc->flags = 0; + + /* Size to DMA per vertex attribute. Used to setup src3 in the DDMAD. */ + assert(fmt_description->block.bits != 0); /* Likely an unsupported fmt. */ + dma_desc->size_in_dwords = fmt_description->block.bits / 32; + + /* Vtxin reg offset to start DMAing into. */ + dma_desc->destination = vtxin_reg_offset; + + /* Will be used by the driver to figure out buffer address to patch in the + * data section. I.e. which binding we should DMA from. + */ + dma_desc->binding_index = attribute->binding; + + /* We don't currently support VK_EXT_vertex_attribute_divisor so no + * repeating of instance-rate vertex attributes needed. We should always + * move on to the next vertex attribute. + */ + dma_desc->divisor = 1; + + /* Will be used to generate PDS code that takes care of robust buffer + * access, and later on by the driver to write the correct robustness + * buffer address to DMA the fallback values from. + */ + dma_desc->robustness_buffer_offset = + pvr_get_robustness_buffer_format_offset(attribute->format); + + /* Used by later on by the driver to figure out if the buffer is being + * accessed out of bounds, for robust buffer access. + */ + dma_desc->component_size_in_bytes = + fmt_description->block.bits / fmt_description->nr_channels / 8; + + dma_count++; + }; + + *vertex_input_layout_out = build_data; + *num_vertex_input_regs_out = next_reg_offset; + *dma_count_out = dma_count; +} + /* Compiles and uploads shaders and PDS programs. */ static VkResult pvr_graphics_pipeline_compile(struct pvr_device *const device, @@ -1661,9 +1846,22 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, const bool old_path = pvr_has_hard_coded_shaders(&device->pdevice->dev_info); /* Vars needed for the new path. */ + struct pvr_pds_vertex_dma vtx_dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS]; + uint32_t vtx_dma_count = 0; + /* TODO: This should be used by the compiler for compiler the vertex shader. + */ + rogue_vertex_inputs vertex_input_layout; + unsigned vertex_input_reg_count = 0; + uint32_t sh_count[PVR_STAGE_ALLOCATION_COUNT] = { 0 }; - if (!old_path) + if (!old_path) { + pvr_graphics_pipeline_alloc_vertex_inputs(vertex_input_state, + &vertex_input_layout, + &vertex_input_reg_count, + &vtx_dma_descriptions, + &vtx_dma_count); + for (enum pvr_stage_allocation pvr_stage = PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY; pvr_stage < PVR_STAGE_ALLOCATION_COMPUTE; @@ -1673,6 +1871,7 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, layout, pvr_stage, &layout->sh_reg_layout_per_stage[pvr_stage]); + } /* Setup shared build context. */ ctx = rogue_build_context_create(compiler, layout); @@ -1775,6 +1974,7 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, } else { pvr_vertex_state_init(gfx_pipeline, &ctx->common_data[MESA_SHADER_VERTEX], + vertex_input_reg_count, &ctx->stage_data.vs); if (!old_path) { @@ -1787,6 +1987,9 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, */ vertex_state->stage_state.const_shared_reg_count = sh_count[PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY]; + + gfx_pipeline->shader_state.vertex.vertex_input_size = + ctx->stage_data.vs.num_vertex_input_regs; } } @@ -1864,6 +2067,8 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device, vertex_input_state, ctx->common_data[MESA_SHADER_VERTEX].temps, &ctx->stage_data.vs, + vtx_dma_descriptions, + vtx_dma_count, &gfx_pipeline->shader_state.vertex.pds_attrib_programs); if (result != VK_SUCCESS) goto err_free_frag_program;