diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index b6eea06aa07..52d15a4984d 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -489,7 +489,6 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) uint32_t enable_mask; switch (id) { case TU_DRAW_STATE_PROGRAM: - case TU_DRAW_STATE_VI: /* The blob seems to not enable this (DESC_SETS_LOAD) for binning, even * when resources would actually be used in the binning shader. * Presumably the overhead of prefetching the resources isn't @@ -500,7 +499,6 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) CP_SET_DRAW_STATE__0_SYSMEM; break; case TU_DRAW_STATE_PROGRAM_BINNING: - case TU_DRAW_STATE_VI_BINNING: enable_mask = CP_SET_DRAW_STATE__0_BINNING; break; case TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM: @@ -2440,12 +2438,11 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) { uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT); - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (8 + util_bitcount(mask))); + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (7 + util_bitcount(mask))); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_CONFIG, pipeline->program.config_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state); - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_SYSMEM, pipeline->prim_order_state_sysmem); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_GMEM, pipeline->prim_order_state_gmem); @@ -4482,7 +4479,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM, pipeline->program.state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PROGRAM_BINNING, pipeline->program.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI, pipeline->vi.state); - tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VI_BINNING, pipeline->vi.binning_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_RAST, pipeline->rast_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_SYSMEM, pipeline->prim_order_state_sysmem); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_PRIM_MODE_GMEM, pipeline->prim_order_state_gmem); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index d37d007b079..19e842729b0 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -26,7 +26,6 @@ enum tu_draw_state_group_id TU_DRAW_STATE_PROGRAM_BINNING, TU_DRAW_STATE_VB, TU_DRAW_STATE_VI, - TU_DRAW_STATE_VI_BINNING, TU_DRAW_STATE_RAST, TU_DRAW_STATE_CONST, TU_DRAW_STATE_DESC_SETS, diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index e436d037fec..d3f77060b56 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -714,6 +714,50 @@ tu6_emit_cs_config(struct tu_cs *cs, } } +#define TU6_EMIT_VFD_DEST_MAX_DWORDS (MAX_VERTEX_ATTRIBS + 2) + +static void +tu6_emit_vfd_dest(struct tu_cs *cs, + const struct ir3_shader_variant *vs) +{ + int32_t input_for_attr[MAX_VERTEX_ATTRIBS]; + uint32_t attr_count = 0; + + for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) + input_for_attr[i] = -1; + + for (unsigned i = 0; i < vs->inputs_count; i++) { + if (vs->inputs[i].sysval || vs->inputs[i].regid == regid(63, 0)) + continue; + + assert(vs->inputs[i].slot >= VERT_ATTRIB_GENERIC0); + unsigned loc = vs->inputs[i].slot - VERT_ATTRIB_GENERIC0; + input_for_attr[loc] = i; + attr_count = MAX2(attr_count, loc + 1); + } + + tu_cs_emit_regs(cs, + A6XX_VFD_CONTROL_0( + .fetch_cnt = attr_count, /* decode_cnt for binning pass ? */ + .decode_cnt = attr_count)); + + if (attr_count) + tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL_INSTR(0), attr_count); + + for (unsigned i = 0; i < attr_count; i++) { + if (input_for_attr[i] >= 0) { + unsigned input_idx = input_for_attr[i]; + tu_cs_emit(cs, A6XX_VFD_DEST_CNTL_INSTR(0, + .writemask = vs->inputs[input_idx].compmask, + .regid = vs->inputs[input_idx].regid).value); + } else { + tu_cs_emit(cs, A6XX_VFD_DEST_CNTL_INSTR(0, + .writemask = 0, + .regid = regid(63, 0)).value); + } + } +} + static void tu6_emit_vs_system_values(struct tu_cs *cs, const struct ir3_shader_variant *vs, @@ -1763,6 +1807,8 @@ tu6_emit_program(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1); tu_cs_emit(cs, 0); + tu6_emit_vfd_dest(cs, vs); + tu6_emit_vpc(cs, vs, hs, ds, gs, fs, cps_per_patch); tu6_emit_vpc_varying_modes(cs, fs); @@ -1805,12 +1851,11 @@ tu6_emit_program(struct tu_cs *cs, } } -#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 5 + 4) +#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 2 + 1) static void tu6_emit_vertex_input(struct tu_pipeline *pipeline, struct tu_draw_state *vi_state, - const struct ir3_shader_variant *vs, const VkPipelineVertexInputStateCreateInfo *info) { uint32_t binding_instanced = 0; /* bitmask of instanced bindings */ @@ -1845,61 +1890,38 @@ tu6_emit_vertex_input(struct tu_pipeline *pipeline, } } - int32_t input_for_attr[MAX_VERTEX_ATTRIBS]; - uint32_t used_attrs_count = 0; + const VkVertexInputAttributeDescription *attrs[MAX_VERTEX_ATTRIBS] = { }; + unsigned attr_count = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *attr = + &info->pVertexAttributeDescriptions[i]; + attrs[attr->location] = attr; + attr_count = MAX2(attr_count, attr->location + 1); + } - for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) { - input_for_attr[attr_idx] = -1; - for (uint32_t input_idx = 0; input_idx < vs->inputs_count; input_idx++) { - if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == - info->pVertexAttributeDescriptions[attr_idx].location) { - input_for_attr[attr_idx] = input_idx; - used_attrs_count++; - break; - } + if (attr_count != 0) + tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DECODE_INSTR(0), attr_count * 2); + + for (uint32_t loc = 0; loc < attr_count; loc++) { + const VkVertexInputAttributeDescription *attr = attrs[loc]; + + if (attr) { + const struct tu_native_format format = tu6_format_vtx(attr->format); + tu_cs_emit(&cs, A6XX_VFD_DECODE_INSTR(0, + .idx = attr->binding, + .offset = attr->offset, + .instanced = binding_instanced & (1 << attr->binding), + .format = format.fmt, + .swap = format.swap, + .unk30 = 1, + ._float = !vk_format_is_int(attr->format)).value); + tu_cs_emit(&cs, A6XX_VFD_DECODE_STEP_RATE(0, step_rate[attr->binding]).value); + } else { + tu_cs_emit(&cs, 0); + tu_cs_emit(&cs, 0); } } - if (used_attrs_count) - tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DECODE_INSTR(0), used_attrs_count * 2); - - for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) { - const VkVertexInputAttributeDescription *attr = - &info->pVertexAttributeDescriptions[attr_idx]; - - if (input_for_attr[attr_idx] == -1) - continue; - - const struct tu_native_format format = tu6_format_vtx(attr->format); - tu_cs_emit(&cs, A6XX_VFD_DECODE_INSTR(0, - .idx = attr->binding, - .offset = attr->offset, - .instanced = binding_instanced & (1 << attr->binding), - .format = format.fmt, - .swap = format.swap, - .unk30 = 1, - ._float = !vk_format_is_int(attr->format)).value); - tu_cs_emit(&cs, A6XX_VFD_DECODE_STEP_RATE(0, step_rate[attr->binding]).value); - } - - if (used_attrs_count) - tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DEST_CNTL_INSTR(0), used_attrs_count); - - for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) { - int32_t input_idx = input_for_attr[attr_idx]; - if (input_idx == -1) - continue; - - tu_cs_emit(&cs, A6XX_VFD_DEST_CNTL_INSTR(0, - .writemask = vs->inputs[input_idx].compmask, - .regid = vs->inputs[input_idx].regid).value); - } - - tu_cs_emit_regs(&cs, - A6XX_VFD_CONTROL_0( - .fetch_cnt = used_attrs_count, /* decode_cnt for binning pass ? */ - .decode_cnt = used_attrs_count)); - *vi_state = tu_cs_end_draw_state(&pipeline->cs, &cs); } @@ -2336,7 +2358,8 @@ tu_pipeline_allocate_cs(struct tu_device *dev, /* graphics case: */ if (builder) { - size += 2 * TU6_EMIT_VERTEX_INPUT_MAX_DWORDS; + size += TU6_EMIT_VERTEX_INPUT_MAX_DWORDS + + 2 * TU6_EMIT_VFD_DEST_MAX_DWORDS; for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { if (builder->shaders->variants[i]) { @@ -3257,8 +3280,6 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, { const VkPipelineVertexInputStateCreateInfo *vi_info = builder->create_info->pVertexInputState; - const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX]; - const struct ir3_shader_variant *bs = builder->binning_variant; /* Bindings may contain holes */ for (unsigned i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { @@ -3266,9 +3287,7 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, MAX2(pipeline->num_vbs, vi_info->pVertexBindingDescriptions[i].binding + 1); } - tu6_emit_vertex_input(pipeline, &pipeline->vi.state, vs, vi_info); - if (bs) - tu6_emit_vertex_input(pipeline, &pipeline->vi.binning_state, bs, vi_info); + tu6_emit_vertex_input(pipeline, &pipeline->vi.state, vi_info); } static void diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 4bc758cd16c..6b074b4b71b 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -167,7 +167,6 @@ struct tu_pipeline struct { struct tu_draw_state state; - struct tu_draw_state binning_state; } vi; struct