From 9586cacdcb3491f80602bf6b76f6ab9867557f04 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 10 Aug 2021 03:32:15 +0000 Subject: [PATCH] tu: implement dynamic rasterizer discard enable The state which could be omitted with rasterization discard enabled - is unconditionally emitted when discard is a dynamic state. It's not an optimal way, but does not intruduce much complexity. Signed-off-by: Danylo Piliaiev Reviewed-by: Hyunjun Ko Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 24 ++++++++++++ src/freedreno/vulkan/tu_pipeline.c | 56 +++++++++++++++++++++------- src/freedreno/vulkan/tu_private.h | 7 +++- 3 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index ab01f308891..34cf61e8876 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2150,6 +2150,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, UPDATE_REG(gras_su_cntl, GRAS_SU_CNTL); UPDATE_REG(rb_depth_cntl, RB_DEPTH_CNTL); UPDATE_REG(rb_stencil_cntl, RB_STENCIL_CNTL); + UPDATE_REG(pc_raster_cntl, RASTERIZER_DISCARD); + UPDATE_REG(vpc_unknown_9107, RASTERIZER_DISCARD); #undef UPDATE_REG if (pipeline->rb_depth_cntl_disable) @@ -2491,6 +2493,22 @@ tu_CmdSetPrimitiveRestartEnableEXT(VkCommandBuffer commandBuffer, cmd->state.primitive_restart_enable = primitiveRestartEnable; } +void +tu_CmdSetRasterizerDiscardEnableEXT(VkCommandBuffer commandBuffer, + VkBool32 rasterizerDiscardEnable) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + + cmd->state.pc_raster_cntl &= ~A6XX_PC_RASTER_CNTL_DISCARD; + cmd->state.vpc_unknown_9107 &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; + if (rasterizerDiscardEnable) { + cmd->state.pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD; + cmd->state.vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; + } + + cmd->state.dirty |= TU_CMD_DIRTY_RASTERIZER_DISCARD; +} + static void tu_flush_for_access(struct tu_cache_state *cache, enum tu_cmd_access_mask src_mask, @@ -3665,6 +3683,12 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, cmd->state.depth_plane_state = tu6_build_depth_plane_z_mode(cmd); } + if (cmd->state.dirty & TU_CMD_DIRTY_RASTERIZER_DISCARD) { + struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RASTERIZER_DISCARD, 4); + tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = cmd->state.pc_raster_cntl)); + tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = cmd->state.vpc_unknown_9107)); + } + if (cmd->state.dirty & TU_CMD_DIRTY_GRAS_SU_CNTL) { struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2); tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.gras_su_cntl)); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 643a71fd79c..59e806e7baf 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -2170,6 +2170,7 @@ tu_pipeline_allocate_cs(struct tu_device *dev, static void tu_pipeline_shader_key_init(struct ir3_shader_key *key, + const struct tu_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pipeline_info) { for (uint32_t i = 0; i < pipeline_info->stageCount; i++) { @@ -2179,7 +2180,8 @@ tu_pipeline_shader_key_init(struct ir3_shader_key *key, } } - if (pipeline_info->pRasterizationState->rasterizerDiscardEnable) + if (pipeline_info->pRasterizationState->rasterizerDiscardEnable && + !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD))) return; const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState; @@ -2271,7 +2273,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, } struct ir3_shader_key key = {}; - tu_pipeline_shader_key_init(&key, builder->create_info); + tu_pipeline_shader_key_init(&key, pipeline, builder->create_info); nir_shader *nir[ARRAY_SIZE(builder->shaders)] = { NULL }; @@ -2439,6 +2441,8 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, pipeline->gras_su_cntl_mask = ~0u; pipeline->rb_depth_cntl_mask = ~0u; pipeline->rb_stencil_cntl_mask = ~0u; + pipeline->pc_raster_cntl_mask = ~0u; + pipeline->vpc_unknown_9107_mask = ~0u; if (!dynamic_info) return; @@ -2517,6 +2521,11 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT: pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE); break; + case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT: + pipeline->pc_raster_cntl_mask &= ~A6XX_PC_RASTER_CNTL_DISCARD; + pipeline->vpc_unknown_9107_mask &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; + pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD); + break; default: assert(!"unsupported dynamic state"); break; @@ -2698,7 +2707,7 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, depth_clip_disable = !depth_clip_state->depthClipEnable; struct tu_cs cs; - uint32_t cs_size = 13 + (builder->emit_msaa_state ? 11 : 0); + uint32_t cs_size = 9 + (builder->emit_msaa_state ? 11 : 0); pipeline->rast_state = tu_cs_draw_state(&pipeline->cs, &cs, cs_size); tu_cs_emit_regs(&cs, @@ -2721,22 +2730,29 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f), A6XX_GRAS_SU_POINT_SIZE(1.0f)); - const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = - vk_find_struct_const(rast_info->pNext, - PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); - unsigned stream = stream_info ? stream_info->rasterizationStream : 0; - tu_cs_emit_regs(&cs, - A6XX_PC_RASTER_CNTL(.stream = stream, - .discard = rast_info->rasterizerDiscardEnable)); - tu_cs_emit_regs(&cs, - A6XX_VPC_UNKNOWN_9107(.raster_discard = rast_info->rasterizerDiscardEnable)); - /* If samples count couldn't be devised from the subpass, we should emit it here. * It happens when subpass doesn't use any color/depth attachment. */ if (builder->emit_msaa_state) tu6_emit_msaa(&cs, builder->samples); + const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = + vk_find_struct_const(rast_info->pNext, + PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); + unsigned stream = stream_info ? stream_info->rasterizationStream : 0; + + pipeline->pc_raster_cntl = A6XX_PC_RASTER_CNTL_STREAM(stream); + pipeline->vpc_unknown_9107 = 0; + if (rast_info->rasterizerDiscardEnable) { + pipeline->pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD; + pipeline->vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; + } + + if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RASTERIZER_DISCARD, 4)) { + tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = pipeline->pc_raster_cntl)); + tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = pipeline->vpc_unknown_9107)); + } + pipeline->gras_su_cntl = tu6_gras_su_cntl(rast_info, builder->samples, builder->multiview_mask != 0); @@ -3077,6 +3093,17 @@ tu_pipeline_builder_init_graphics( .layout = layout, }; + bool rasterizer_discard_dynamic = false; + if (create_info->pDynamicState) { + for (uint32_t i = 0; i < create_info->pDynamicState->dynamicStateCount; i++) { + if (create_info->pDynamicState->pDynamicStates[i] == + VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT) { + rasterizer_discard_dynamic = true; + break; + } + } + } + const struct tu_render_pass *pass = tu_render_pass_from_handle(create_info->renderPass); const struct tu_subpass *subpass = @@ -3085,7 +3112,8 @@ tu_pipeline_builder_init_graphics( builder->multiview_mask = subpass->multiview_mask; builder->rasterizer_discard = - create_info->pRasterizationState->rasterizerDiscardEnable; + builder->create_info->pRasterizationState->rasterizerDiscardEnable && + !rasterizer_discard_dynamic; /* variableMultisampleRate support */ builder->emit_msaa_state = (subpass->samples == 0) && !builder->rasterizer_discard; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index e76bf10736b..eaadf028da6 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -502,6 +502,7 @@ enum tu_dynamic_state TU_DYNAMIC_STATE_RB_DEPTH_CNTL, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, TU_DYNAMIC_STATE_VB_STRIDE, + TU_DYNAMIC_STATE_RASTERIZER_DISCARD, TU_DYNAMIC_STATE_COUNT, /* no associated draw state: */ TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT, @@ -725,8 +726,9 @@ enum tu_cmd_dirty_bits TU_CMD_DIRTY_SHADER_CONSTS = BIT(7), TU_CMD_DIRTY_LRZ = BIT(8), TU_CMD_DIRTY_VS_PARAMS = BIT(9), + TU_CMD_DIRTY_RASTERIZER_DISCARD = BIT(10), /* all draw states were disabled and need to be re-enabled: */ - TU_CMD_DIRTY_DRAW_STATE = BIT(10) + TU_CMD_DIRTY_DRAW_STATE = BIT(11) }; /* There are only three cache domains we have to care about: the CCU, or @@ -940,6 +942,7 @@ struct tu_cmd_state uint32_t dynamic_stencil_ref; uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl; + uint32_t pc_raster_cntl, vpc_unknown_9107; enum pc_di_primtype primtype; bool primitive_restart_enable; @@ -1164,6 +1167,8 @@ struct tu_pipeline uint32_t gras_su_cntl, gras_su_cntl_mask; uint32_t rb_depth_cntl, rb_depth_cntl_mask; uint32_t rb_stencil_cntl, rb_stencil_cntl_mask; + uint32_t pc_raster_cntl, pc_raster_cntl_mask; + uint32_t vpc_unknown_9107, vpc_unknown_9107_mask; uint32_t stencil_wrmask; bool rb_depth_cntl_disable;