From 1ad295ed6f6ee7e33d23efa2392a8018605ffca3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 19 Jan 2021 16:12:01 +0100 Subject: [PATCH] radv: allow to force VRS rates on GFX10.3 with RADV_FORCE_VRS This allows to force the VRS rates via RADV_FORCE_VRS, the supported values are 2x2, 1x2 and 2x1. This supports the primitive shading rate mode for non GUI elements. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- docs/envvars.rst | 6 ++++ .../compiler/aco_instruction_selection.cpp | 30 +++++++++++++++- .../aco_instruction_selection_setup.cpp | 4 ++- src/amd/vulkan/radv_device.c | 21 +++++++++++- src/amd/vulkan/radv_pipeline.c | 34 ++++++++++++++++--- src/amd/vulkan/radv_private.h | 14 ++++++++ src/amd/vulkan/radv_shader.c | 14 ++++++++ src/amd/vulkan/radv_shader.h | 1 + 8 files changed, 117 insertions(+), 7 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index 8ffc53974d9..5cebbb37754 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -622,6 +622,12 @@ RADV driver environment variables ``RADV_FORCE_FAMILY`` create a null device to compile shaders without a AMD GPU (e.g. vega10) + +``RADV_FORCE_VRS`` + allow to force per-pipeline vertex VRS rates on GFX10.3+. This is only + forced for pipelines that don't explicitely use VRS or flat shading. + The supported values are 2x2, 1x2 and 2x1. Only for testing purposes. + ``RADV_PERFTEST`` a comma-separated list of named flags, which do various things: diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index e921c70da86..59c76d88064 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10289,6 +10289,31 @@ static void export_vs_psiz_layer_viewport_vrs(isel_context *ctx, int *next_pos) exp->operands[1] = Operand(out); exp->enabled_mask |= 0x2; + } else if (ctx->options->force_vrs_rates) { + /* Bits [2:3] = VRS rate X + * Bits [4:5] = VRS rate Y + * + * The range is [-2, 1]. Values: + * 1: 2x coarser shading rate in that direction. + * 0: normal shading rate + * -1: 2x finer shading rate (sample shading, not directional) + * -2: 4x finer shading rate (sample shading, not directional) + * + * Sample shading can't go above 8 samples, so both numbers can't be -2 + * at the same time. + */ + Builder bld(ctx->program, ctx->block); + Temp rates = bld.copy(bld.def(v1), Operand((unsigned)ctx->options->force_vrs_rates)); + + /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */ + Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), + Operand(0x3f800000u), + Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3])); + rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), + bld.copy(bld.def(v1), Operand(0u)), rates, cond); + + exp->operands[1] = Operand(rates); + exp->enabled_mask |= 0x2; } exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0; @@ -10354,8 +10379,11 @@ static void create_vs_exports(isel_context *ctx) /* the order these position exports are created is important */ int next_pos = 0; export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos); + + bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate || + ctx->options->force_vrs_rates; if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index || - outinfo->writes_primitive_shading_rate) { + writes_primitive_shading_rate) { export_vs_psiz_layer_viewport_vrs(ctx, &next_pos); } if (ctx->num_clip_distances + ctx->num_cull_distances > 0) diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 95e91808f68..bbdbd800e7a 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -328,8 +328,10 @@ setup_vs_output_info(isel_context *ctx, nir_shader *nir, outinfo->param_exports = 0; int pos_written = 0x1; + bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate || + ctx->options->force_vrs_rates; if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer || - outinfo->writes_primitive_shading_rate) + writes_primitive_shading_rate) pos_written |= 1 << 1; uint64_t mask = nir->info.outputs_written; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index bc594aa2123..25af86c3950 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2861,7 +2861,8 @@ VkResult radv_CreateDevice( device->robust_buffer_access2 = robust_buffer_access2; device->adjust_frag_coord_z = (vrs_enabled || - device->vk.enabled_extensions.KHR_fragment_shading_rate) && + device->vk.enabled_extensions.KHR_fragment_shading_rate || + device->force_vrs != RADV_FORCE_VRS_NONE) && (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID || device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER || device->physical_device->rad_info.family == CHIP_VANGOGH); @@ -3013,6 +3014,24 @@ VkResult radv_CreateDevice( goto fail; } + if (getenv("RADV_FORCE_VRS")) { + const char *vrs_rates = getenv("RADV_FORCE_VRS"); + + if (device->physical_device->rad_info.chip_class < GFX10_3) + fprintf(stderr, "radv: VRS is only supported on RDNA2+\n"); + else if (device->physical_device->use_llvm) + fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n"); + else if (!strcmp(vrs_rates, "2x2")) + device->force_vrs = RADV_FORCE_VRS_2x2; + else if (!strcmp(vrs_rates, "2x1")) + device->force_vrs = RADV_FORCE_VRS_2x1; + else if (!strcmp(vrs_rates, "1x2")) + device->force_vrs = RADV_FORCE_VRS_1x2; + else + fprintf(stderr, "radv: Invalid VRS rates specified " + "(valid values are 2x2, 2x1 and 1x2)\n"); + } + device->keep_shader_info = keep_shader_info; result = radv_device_init_meta(device); if (result != VK_SUCCESS) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 748a5a1ee4c..b47dc5ae4fe 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -227,6 +227,12 @@ static uint32_t get_hash_flags(const struct radv_device *device, bool stats) hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM; if (stats) hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS; + if (device->force_vrs != RADV_FORCE_VRS_2x2) + hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2; + if (device->force_vrs != RADV_FORCE_VRS_2x1) + hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1; + if (device->force_vrs != RADV_FORCE_VRS_1x2) + hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2; return hash_flags; } @@ -4438,10 +4444,13 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, clip_dist_mask = outinfo->clip_dist_mask; cull_dist_mask = outinfo->cull_dist_mask; total_mask = clip_dist_mask | cull_dist_mask; + + bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate || + pipeline->device->force_vrs != RADV_FORCE_VRS_NONE; bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index || - outinfo->writes_primitive_shading_rate; + writes_primitive_shading_rate; unsigned spi_vs_out_config, nparams; /* VS is required to export at least one param. */ @@ -4470,7 +4479,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | - S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | + S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | @@ -4545,10 +4554,13 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, clip_dist_mask = outinfo->clip_dist_mask; cull_dist_mask = outinfo->cull_dist_mask; total_mask = clip_dist_mask | cull_dist_mask; + + bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate || + pipeline->device->force_vrs != RADV_FORCE_VRS_NONE; bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index || - outinfo->writes_primitive_shading_rate; + writes_primitive_shading_rate; bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id); bool break_wave_at_eoi = false; @@ -4586,7 +4598,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | - S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | + S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | @@ -5332,6 +5344,20 @@ gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs, */ mode = V_028064_VRS_COMB_MODE_OVERRIDE; rate_x = rate_y = 1; + } else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) { + /* Force enable vertex VRS if requested by the user. */ + radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL, + S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | + S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); + + /* If the shader is using discard, turn off coarse shading + * because discard at 2x2 pixel granularity degrades quality + * too much. MIN allows sample shading but not coarse shading. + */ + struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; + + mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN + : V_028064_VRS_COMB_MODE_PASSTHRU; } radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 6b9f6d9853c..02368fa1e07 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -731,6 +731,14 @@ struct radv_device_border_color_data { mtx_t mutex; }; +enum radv_force_vrs +{ + RADV_FORCE_VRS_NONE = 0, + RADV_FORCE_VRS_2x2, + RADV_FORCE_VRS_2x1, + RADV_FORCE_VRS_1x2, +}; + struct radv_device { struct vk_device vk; @@ -823,6 +831,9 @@ struct radv_device { /* Track the number of device loss occurs. */ int lost; + + /* Whether the user forced VRS rates on GFX10.3+. */ + enum radv_force_vrs force_vrs; }; VkResult _radv_device_set_lost(struct radv_device *device, @@ -1643,6 +1654,9 @@ struct radv_event { #define RADV_HASH_SHADER_MRT_NAN_FIXUP (1 << 6) #define RADV_HASH_SHADER_INVARIANT_GEOM (1 << 7) #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) +#define RADV_HASH_SHADER_FORCE_VRS_2x2 (1 << 9) +#define RADV_HASH_SHADER_FORCE_VRS_2x1 (1 << 10) +#define RADV_HASH_SHADER_FORCE_VRS_1x2 (1 << 11) void radv_hash_shaders(unsigned char *hash, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 86607bca1e0..e23bf5f6ad9 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1420,6 +1420,20 @@ shader_variant_compile(struct radv_device *device, options->debug.func = radv_compiler_debug; options->debug.private_data = &debug_data; + switch (device->force_vrs) { + case RADV_FORCE_VRS_2x2: + options->force_vrs_rates = (1u << 2) | (1u << 4); + break; + case RADV_FORCE_VRS_2x1: + options->force_vrs_rates = (0u << 2) | (1u << 4); + break; + case RADV_FORCE_VRS_1x2: + options->force_vrs_rates = (1u << 2) | (0u << 4); + break; + default: + break; + } + struct radv_shader_args args = {0}; args.options = options; args.shader_info = info; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index d489cae20db..d4878cc14a1 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -142,6 +142,7 @@ struct radv_nir_compiler_options { const struct radeon_info *info; uint32_t tess_offchip_block_dw_size; uint32_t address32_hi; + uint8_t force_vrs_rates; struct { void (*func)(void *private_data,