diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 781b0fa1b97..6a14aee0bc9 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -129,6 +129,7 @@ const struct radv_dynamic_state default_dynamic_state = { .alpha_to_coverage_enable = 0u, .sample_mask = 0u, .depth_clip_enable = 0u, + .conservative_rast_mode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, }; static void @@ -276,6 +277,8 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy RADV_CMP_COPY(depth_clip_enable, RADV_DYNAMIC_DEPTH_CLIP_ENABLE); + RADV_CMP_COPY(conservative_rast_mode, RADV_DYNAMIC_CONSERVATIVE_RAST_MODE); + #undef RADV_CMP_COPY cmd_buffer->state.dirty |= dest_mask; @@ -1510,6 +1513,11 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.emitted_graphics_pipeline->ms.pa_sc_mode_cntl_0 != pipeline->ms.pa_sc_mode_cntl_0) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE_ENABLE; + if (!cmd_buffer->state.emitted_graphics_pipeline || + cmd_buffer->state.emitted_graphics_pipeline->ms.pa_sc_aa_config != pipeline->ms.pa_sc_aa_config || + cmd_buffer->state.emitted_graphics_pipeline->ms.db_eqaa != pipeline->ms.db_eqaa) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE; + radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); if (pipeline->has_ngg_culling && @@ -1985,6 +1993,49 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.tess_num_patches); } +static void +radv_emit_conservative_rast_mode(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; + struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + unsigned pa_sc_aa_config = pipeline->ms.pa_sc_aa_config; + unsigned db_eqaa = pipeline->ms.db_eqaa; + + if (pdevice->rad_info.gfx_level >= GFX9) { + uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); + + if (d->conservative_rast_mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { + pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | + S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); + + if (d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) { + pa_sc_conservative_rast |= + S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) | + S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | + S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); + } else { + assert(d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT); + pa_sc_conservative_rast |= + S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | + S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) | + S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0); + } + + /* Adjust MSAA state if conservative rasterization is enabled. */ + pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); + db_eqaa |= S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | + S_028804_OVERRASTERIZATION_AMOUNT(4); + } + + radeon_set_context_reg(cmd_buffer->cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + pa_sc_conservative_rast); + } + + radeon_set_context_reg(cmd_buffer->cs, R_028BE0_PA_SC_AA_CONFIG, pa_sc_aa_config); + radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, db_eqaa); +} + static void radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb, struct radv_image_view *iview, @@ -3435,6 +3486,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pip if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) radv_emit_discard_rectangle(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE) + radv_emit_conservative_rast_mode(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) radv_emit_sample_locations(cmd_buffer); @@ -5963,6 +6017,18 @@ radv_CmdSetDepthClipEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthClipE state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE; } +VKAPI_ATTR void VKAPI_CALL +radv_CmdSetConservativeRasterizationModeEXT(VkCommandBuffer commandBuffer, + VkConservativeRasterizationModeEXT conservativeRasterizationMode) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + state->dynamic.conservative_rast_mode = conservativeRasterizationMode; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CONSERVATIVE_RAST_MODE; +} + VKAPI_ATTR void VKAPI_CALL radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) @@ -7353,7 +7419,9 @@ radv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inve /* Small primitive culling is only valid when conservative overestimation is not used. It's also * disabled for user sample locations because small primitive culling assumes a sample * position at (0.5, 0.5). */ - if (!pipeline->uses_conservative_overestimate && !pipeline->uses_user_sample_locations) { + bool uses_conservative_overestimate = + d->conservative_rast_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT; + if (!uses_conservative_overestimate && !pipeline->uses_user_sample_locations) { nggc_settings |= radv_nggc_small_primitives; /* small_prim_precision = num_samples / 2^subpixel_bits diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 4410af3f799..4e4d52c8be1 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1078,7 +1078,6 @@ radv_pipeline_init_multisample_state(struct radv_graphics_pipeline *pipeline, const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; struct radv_multisample_state *ms = &pipeline->ms; unsigned num_tile_pipes = pdevice->rad_info.num_tile_pipes; - const VkConservativeRasterizationModeEXT mode = state->rs->conservative_mode; bool out_of_order_rast = false; int ps_iter_samples = 1; @@ -1122,14 +1121,6 @@ radv_pipeline_init_multisample_state(struct radv_graphics_pipeline *pipeline, ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); - /* Adjust MSAA state if conservative rasterization is enabled. */ - if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { - ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); - - ms->db_eqaa |= - S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | S_028804_OVERRASTERIZATION_AMOUNT(4); - } - ms->pa_sc_mode_cntl_1 = S_028A4C_WALK_FENCE_ENABLE(1) | // TODO linear dst fixes S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | @@ -1898,6 +1889,10 @@ radv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline, dynamic->depth_clip_enable = state->rs->depth_clip_enable == VK_MESA_DEPTH_CLIP_ENABLE_TRUE; } + if (states & RADV_DYNAMIC_CONSERVATIVE_RAST_MODE) { + dynamic->conservative_rast_mode = state->rs->conservative_mode; + } + pipeline->dynamic_state.mask = states; } @@ -1914,9 +1909,6 @@ radv_pipeline_init_raster_state(struct radv_graphics_pipeline *pipeline, S_028810_DX_CLIP_SPACE_DEF(!pipeline->negative_one_to_one) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); - pipeline->uses_conservative_overestimate = - state->rs->conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT; - pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_VIEWPORT; if (!state->rs->depth_clamp_enable) { /* For optimal performance, depth clamping should always be enabled except if the @@ -4797,40 +4789,6 @@ radv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); } -static void -radv_pipeline_emit_raster_state(struct radeon_cmdbuf *ctx_cs, - const struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state) -{ - const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; - const VkConservativeRasterizationModeEXT mode = state->rs->conservative_mode; - uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); - - if (pdevice->rad_info.gfx_level >= GFX9) { - /* Conservative rasterization. */ - if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { - pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | - S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); - - if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) { - pa_sc_conservative_rast |= - S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) | - S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | - S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); - } else { - assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT); - pa_sc_conservative_rast |= - S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | - S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) | - S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0); - } - } - - radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, - pa_sc_conservative_rast); - } -} - static void radv_pipeline_emit_multisample_state(struct radeon_cmdbuf *ctx_cs, const struct radv_graphics_pipeline *pipeline) @@ -4838,9 +4796,6 @@ radv_pipeline_emit_multisample_state(struct radeon_cmdbuf *ctx_cs, const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; const struct radv_multisample_state *ms = &pipeline->ms; - radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa); - radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config); - radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); /* The exclusion bits can be set to improve rasterization efficiency @@ -5817,7 +5772,6 @@ radv_pipeline_emit_pm4(struct radv_graphics_pipeline *pipeline, radv_pipeline_emit_depth_stencil_state(ctx_cs, ds_state); radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend); - radv_pipeline_emit_raster_state(ctx_cs, pipeline, state); radv_pipeline_emit_multisample_state(ctx_cs, pipeline); radv_pipeline_emit_vgt_gs_mode(ctx_cs, pipeline); radv_pipeline_emit_vertex_shader(ctx_cs, cs, pipeline); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index c09c9c5e95c..165107704ca 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1365,6 +1365,8 @@ struct radv_dynamic_state { uint16_t sample_mask; bool depth_clip_enable; + + VkConservativeRasterizationModeEXT conservative_rast_mode; }; extern const struct radv_dynamic_state default_dynamic_state; @@ -2069,7 +2071,6 @@ struct radv_graphics_pipeline { bool disable_out_of_order_rast_for_occlusion; bool uses_drawid; bool uses_baseinstance; - bool uses_conservative_overestimate; bool negative_one_to_one; enum radv_depth_clamp_mode depth_clamp_mode; bool use_per_attribute_vb_descs;