diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index cf6652d918f..0cb4a127c0b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -99,6 +99,12 @@ const struct radv_dynamic_state default_dynamic_state = { .cull_mode = 0u, .front_face = 0u, .primitive_topology = 0u, + .fragment_shading_rate = { + .size = (VkExtent2D) { 1u, 1u }, + .combiner_ops = { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR + }, + }, }; static void @@ -296,6 +302,15 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, } } + if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { + if (memcmp(&dest->fragment_shading_rate, + &src->fragment_shading_rate, + sizeof(src->fragment_shading_rate))) { + dest->fragment_shading_rate = src->fragment_shading_rate; + dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE; + } + } + cmd_buffer->state.dirty |= dest_mask; } @@ -1566,6 +1581,28 @@ radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op))); } +static void +radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1; + uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1; + uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl; + + /* Emit per-draw VRS rate which is the first combiner. */ + radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, + S_03098C_RATE_X(rate_x) | + S_03098C_RATE_Y(rate_y)); + + /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the + * draw rate and the vertex rate. + */ + pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]); + + radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl); +} + static void radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, @@ -2557,6 +2594,9 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) radv_emit_stencil_control(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE) + radv_emit_fragment_shading_rate(cmd_buffer); + cmd_buffer->state.dirty &= ~states; } @@ -4736,6 +4776,21 @@ void radv_CmdSetStencilOpEXT( state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; } +void radv_CmdSetFragmentShadingRateKHR( + VkCommandBuffer commandBuffer, + const VkExtent2D* pFragmentSize, + const VkFragmentShadingRateCombinerOpKHR combinerOps[2]) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + state->dynamic.fragment_shading_rate.size = *pFragmentSize; + for (unsigned i = 0; i < 2; i++) + state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i]; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; +} + void radv_CmdExecuteCommands( VkCommandBuffer commandBuffer, uint32_t commandBufferCount, diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 4e79e04e987..d698e858ce7 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -8133,6 +8133,35 @@ void radv_GetPhysicalDeviceMultisamplePropertiesEXT( } } +VkResult radv_GetPhysicalDeviceFragmentShadingRatesKHR( + VkPhysicalDevice physicalDevice, + uint32_t* pFragmentShadingRateCount, + VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates) +{ + VK_OUTARRAY_MAKE(out, pFragmentShadingRates, pFragmentShadingRateCount); + +#define append_rate(w, h, s) { \ + VkPhysicalDeviceFragmentShadingRateKHR rate = { \ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,\ + .sampleCounts = s, \ + .fragmentSize = { .width = w, .height = h }, \ + }; \ + vk_outarray_append(&out, r) *r = rate; \ +} + + for (uint32_t x = 2; x >= 1; x--) { + for (uint32_t y = 2; y >= 1; y--) { + append_rate(x, y, VK_SAMPLE_COUNT_1_BIT | + VK_SAMPLE_COUNT_2_BIT | + VK_SAMPLE_COUNT_4_BIT | + VK_SAMPLE_COUNT_8_BIT); + } + } +#undef append_rate + + return vk_outarray_status(&out); +} + VkResult radv_CreatePrivateDataSlotEXT( VkDevice _device, const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c index b2fc39b9638..cb61d4aa383 100644 --- a/src/amd/vulkan/radv_meta.c +++ b/src/amd/vulkan/radv_meta.c @@ -85,6 +85,10 @@ radv_meta_save(struct radv_meta_saved_state *state, state->stencil_op.back.fail_op = cmd_buffer->state.dynamic.stencil_op.back.fail_op; state->stencil_op.back.pass_op = cmd_buffer->state.dynamic.stencil_op.back.pass_op; state->stencil_op.back.depth_fail_op = cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op; + + state->fragment_shading_rate.size = cmd_buffer->state.dynamic.fragment_shading_rate.size; + state->fragment_shading_rate.combiner_ops[0] = cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0]; + state->fragment_shading_rate.combiner_ops[1] = cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1]; } if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) { @@ -167,6 +171,10 @@ radv_meta_restore(const struct radv_meta_saved_state *state, cmd_buffer->state.dynamic.stencil_op.back.pass_op = state->stencil_op.back.pass_op; cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op = state->stencil_op.back.depth_fail_op; + cmd_buffer->state.dynamic.fragment_shading_rate.size = state->fragment_shading_rate.size; + cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0] = state->fragment_shading_rate.combiner_ops[0]; + cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1] = state->fragment_shading_rate.combiner_ops[1]; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | @@ -177,7 +185,8 @@ radv_meta_restore(const struct radv_meta_saved_state *state, RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; + RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP | + RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; } if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) { diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h index 7ec6899a127..8f22ca9743a 100644 --- a/src/amd/vulkan/radv_meta.h +++ b/src/amd/vulkan/radv_meta.h @@ -85,6 +85,11 @@ struct radv_meta_saved_state { VkCompareOp compare_op; } back; } stencil_op; + + struct { + VkExtent2D size; + VkFragmentShadingRateCombinerOpKHR combiner_ops[2]; + } fragment_shading_rate; }; VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 4b639f0446e..d48c2782d57 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1226,6 +1226,46 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, ms->pa_sc_aa_mask[1] = mask | (mask << 16); } +static void +gfx103_pipeline_init_vrs_state(struct radv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo); + struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; + struct radv_multisample_state *ms = &pipeline->graphics.ms; + struct radv_vrs_state *vrs = &pipeline->graphics.vrs; + + if (vkms && + (vkms->sampleShadingEnable || + ps->info.ps.uses_sample_shading || ps->info.ps.reads_sample_mask_in)) { + /* Disable VRS and use the rates from PS_ITER_SAMPLES if: + * + * 1) sample shading is enabled or per-sample interpolation is + * used by the fragment shader + * 2) the fragment shader reads gl_SampleMaskIn because the + * 16-bit sample coverage mask isn't enough for MSAA8x and + * 2x2 coarse shading isn't enough. + */ + vrs->pa_cl_vrs_cntl = + S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE); + + /* Make sure sample shading is enabled even if only MSAA1x is + * used because the SAMPLE_ITER combiner is in passthrough + * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate. + * The default VRS rate when sample shading is enabled is 1x1. + */ + if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1)) + ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1); + } else { + vrs->pa_cl_vrs_cntl = + S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); + } + + /* Primitive and HTILE combiners are always passthrough. */ + vrs->pa_cl_vrs_cntl |= S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) | + S_028848_HTILE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); +} + static bool radv_prim_can_use_guardband(enum VkPrimitiveTopology topology) { @@ -1344,6 +1384,8 @@ static unsigned radv_dynamic_state_mask(VkDynamicState state) return RADV_DYNAMIC_STENCIL_OP; case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT: return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE; + case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: + return RADV_DYNAMIC_FRAGMENT_SHADING_RATE; default: unreachable("Unhandled dynamic state"); } @@ -1389,6 +1431,10 @@ static uint32_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreat PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT)) states &= ~RADV_DYNAMIC_LINE_STIPPLE; + if (!vk_find_struct_const(pCreateInfo->pNext, + PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR)) + states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE; + /* TODO: blend constants & line width. */ return states; @@ -1725,6 +1771,14 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE)) pipeline->graphics.uses_dynamic_stride = true; + const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate = + vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); + if (needed_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { + dynamic->fragment_shading_rate.size = shading_rate->fragmentSize; + for (int i = 0; i < 2; i++) + dynamic->fragment_shading_rate.combiner_ops[i] = shading_rate->combinerOps[i]; + } + pipeline->dynamic_state.mask = states; } @@ -4228,7 +4282,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, total_mask = clip_dist_mask | cull_dist_mask; bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || - outinfo->writes_viewport_index; + outinfo->writes_viewport_index || + outinfo->writes_primitive_shading_rate; unsigned spi_vs_out_config, nparams; /* VS is required to export at least one param. */ @@ -4257,12 +4312,11 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | + S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | - S_02881C_BYPASS_PRIM_RATE_COMBINER(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | - S_02881C_BYPASS_VTX_RATE_COMBINER(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | cull_dist_mask << 8 | clip_dist_mask); @@ -4335,7 +4389,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, total_mask = clip_dist_mask | cull_dist_mask; bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || - outinfo->writes_viewport_index; + outinfo->writes_viewport_index || + outinfo->writes_primitive_shading_rate; bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id); bool break_wave_at_eoi = false; @@ -4373,12 +4428,11 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | + S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | - S_02881C_BYPASS_PRIM_RATE_COMBINER(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | - S_02881C_BYPASS_VTX_RATE_COMBINER(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | cull_dist_mask << 8 | clip_dist_mask); @@ -5086,6 +5140,20 @@ radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); } +static void +gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + bool enable_vrs = false; + + if (vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) || + radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) + enable_vrs = true; + + radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, + S_028A98_EN_VRS_RATE(enable_vrs)); +} + static void radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, @@ -5123,6 +5191,9 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && !radv_pipeline_has_ngg(pipeline)) gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline); + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) + gfx103_pipeline_generate_vrs_state(ctx_cs, pCreateInfo); + pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4); assert(ctx_cs->cdw <= ctx_cs->max_dw); @@ -5234,6 +5305,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline, radv_pipeline_init_raster_state(pipeline, pCreateInfo); radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo); + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) + gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo); + /* Ensure that some export memory is always allocated, for two reasons: * * 1) Correctness: The hardware ignores the EXEC mask if no export diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7b88ae59589..c980727d9f0 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1011,7 +1011,8 @@ enum radv_dynamic_state_bits { RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 19, RADV_DYNAMIC_STENCIL_OP = 1 << 20, RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 21, - RADV_DYNAMIC_ALL = (1 << 22) - 1, + RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1 << 22, + RADV_DYNAMIC_ALL = (1 << 23) - 1, }; enum radv_cmd_dirty_bits { @@ -1039,12 +1040,13 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 19, RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 20, RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 21, - RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 22) - 1, - RADV_CMD_DIRTY_PIPELINE = 1 << 22, - RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 23, - RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 24, - RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 25, - RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 26, + RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1 << 22, + RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 23) - 1, + RADV_CMD_DIRTY_PIPELINE = 1 << 23, + RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 24, + RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 25, + RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 26, + RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 27, }; enum radv_cmd_flush_bits { @@ -1209,6 +1211,11 @@ struct radv_dynamic_state { VkCompareOp depth_compare_op; bool depth_bounds_test_enable; bool stencil_test_enable; + + struct { + VkExtent2D size; + VkFragmentShadingRateCombinerOpKHR combiner_ops[2]; + } fragment_shading_rate; }; extern const struct radv_dynamic_state default_dynamic_state; @@ -1696,6 +1703,10 @@ struct radv_multisample_state { unsigned num_samples; }; +struct radv_vrs_state { + uint32_t pa_cl_vrs_cntl; +}; + struct radv_prim_vertex_count { uint8_t min; uint8_t incr; @@ -1740,6 +1751,7 @@ struct radv_pipeline { struct { struct radv_multisample_state ms; struct radv_binning_state binning; + struct radv_vrs_state vrs; uint32_t spi_baryc_cntl; bool prim_restart_enable; unsigned esgs_ring_size; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 4705a474bdb..cab4b1b5941 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -459,6 +459,7 @@ radv_shader_compile_to_nir(struct radv_device *device, .variable_pointers = true, .vk_memory_model = true, .vk_memory_model_device_scope = true, + .fragment_shading_rate = device->physical_device->rad_info.chip_class >= GFX10_3, }, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset, diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 833fa84cfe0..24778f5adf8 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -212,6 +212,7 @@ struct radv_vs_output_info { bool writes_pointsize; bool writes_layer; bool writes_viewport_index; + bool writes_primitive_shading_rate; bool export_prim_id; unsigned pos_exports; }; @@ -323,6 +324,7 @@ struct radv_shader_info { bool can_discard; bool early_fragment_test; bool post_depth_coverage; + bool reads_sample_mask_in; uint8_t depth_layout; } ps; struct { diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 8147b08b845..24c6ed015bc 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -162,6 +162,9 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, case nir_intrinsic_load_num_subgroups: info->cs.uses_local_invocation_idx = true; break; + case nir_intrinsic_load_sample_mask_in: + info->ps.reads_sample_mask_in = true; + break; case nir_intrinsic_load_view_index: info->needs_multiview_view_index = true; if (nir->info.stage == MESA_SHADER_FRAGMENT) @@ -487,6 +490,9 @@ gather_info_output_decl(const nir_shader *nir, const nir_variable *var, case VARYING_SLOT_LAYER: vs_info->writes_layer = true; break; + case VARYING_SLOT_PRIMITIVE_SHADING_RATE: + vs_info->writes_primitive_shading_rate = true; + break; default: break; } diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 0d83ca2d8cd..0030338f0ef 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -491,6 +491,15 @@ si_emit_graphics(struct radv_device *device, /* This allows sample shading. */ radeon_set_context_reg(cs, R_028848_PA_CL_VRS_CNTL, S_028848_SAMPLE_ITER_COMBINER_MODE(1)); + + /* This is the main VRS register and also the last + * combiner, set it to passthrough mode because other + * combiners are configured with PA_CL_VRS_CNTL. + */ + radeon_set_context_reg(cs, R_028064_DB_VRS_OVERRIDE_CNTL, + S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(V_028064_VRS_COMB_MODE_PASSTHRU) | + S_028064_VRS_OVERRIDE_RATE_X(0) | + S_028064_VRS_OVERRIDE_RATE_Y(0)); } if (physical_device->rad_info.chip_class == GFX10) {