radv: lower adjusting gl_FragCoord.z for VRS in NIR

fossils-db (Sienna Cichlid):
Totals from 4432 (3.29% of 134913) affected shaders:
VGPRs: 231232 -> 231880 (+0.28%)
CodeSize: 24738224 -> 24718008 (-0.08%); split: -0.08%, +0.00%
MaxWaves: 93120 -> 93000 (-0.13%)
Instrs: 4540970 -> 4541062 (+0.00%); split: -0.01%, +0.01%
Latency: 49658353 -> 49641444 (-0.03%); split: -0.05%, +0.01%
InvThroughput: 9604328 -> 9603041 (-0.01%); split: -0.02%, +0.01%
VClause: 66497 -> 66498 (+0.00%)
SClause: 209530 -> 209532 (+0.00%); split: -0.01%, +0.01%
Copies: 276135 -> 276249 (+0.04%); split: -0.14%, +0.18%
PreSGPRs: 189409 -> 189415 (+0.00%)
PreVGPRs: 207368 -> 207458 (+0.04%)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15450>
This commit is contained in:
Samuel Pitoiset
2022-03-18 10:15:41 +01:00
parent a42b6a4d39
commit 4cfb5332d6
7 changed files with 28 additions and 57 deletions

View File

@@ -4886,34 +4886,6 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_arg(ctx, ctx->args->ac.frag_pos[3]));
}
if (ctx->options->adjust_frag_coord_z &&
G_0286CC_POS_Z_FLOAT_ENA(ctx->program->config->spi_ps_input_ena)) {
/* Adjust gl_FragCoord.z for VRS due to a hw bug on some GFX10.3 chips. */
Operand frag_z = vec->operands[2];
Temp adjusted_frag_z = bld.tmp(v1);
Temp tmp;
/* dFdx fine */
Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), frag_z, dpp_quad_perm(0, 0, 2, 2));
tmp = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), frag_z, tl, dpp_quad_perm(1, 1, 3, 3));
emit_wqm(bld, tmp, adjusted_frag_z, true);
/* adjusted_frag_z * 0.0625 + frag_z */
adjusted_frag_z = bld.vop3(aco_opcode::v_fma_f32, bld.def(v1), adjusted_frag_z,
Operand::c32(0x3d800000u /* 0.0625 */), frag_z);
/* VRS Rate X = Ancillary[2:3] */
Temp x_rate =
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
Operand::c32(2u), Operand::c32(2u));
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
Temp cond =
bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(x_rate));
vec->operands[2] =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), frag_z, adjusted_frag_z, cond);
}
for (Operand& op : vec->operands)
op = op.isUndefined() ? Operand::zero() : op;

View File

@@ -3540,26 +3540,6 @@ emit_load_frag_coord(struct ac_nir_context *ctx)
ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
if (ctx->abi->adjust_frag_coord_z) {
/* Adjust gl_FragCoord.z for VRS due to a hw bug on some GFX10.3 chips. */
LLVMValueRef frag_z = values[2];
/* dFdx fine */
LLVMValueRef adjusted_frag_z = emit_ddxy(ctx, nir_op_fddx_fine, frag_z);
/* adjusted_frag_z * 0.0625 + frag_z */
adjusted_frag_z = LLVMBuildFAdd(ctx->ac.builder, frag_z,
LLVMBuildFMul(ctx->ac.builder, adjusted_frag_z,
LLVMConstReal(ctx->ac.f32, 0.0625), ""), "");
/* VRS Rate X = Ancillary[2:3] */
LLVMValueRef x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");
values[2] = LLVMBuildSelect(ctx->ac.builder, cond, adjusted_frag_z, frag_z, "");
}
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
}

View File

@@ -144,11 +144,6 @@ struct ac_shader_abi {
/* Clamp div by 0 (so it won't produce NaN) */
bool clamp_div_by_zero;
/* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug on
* some GFX10.3 chips.
*/
bool adjust_frag_coord_z;
/* Whether to inline the compute dispatch size in user sgprs. */
bool load_grid_size_from_user_sgpr;
};

View File

@@ -2107,7 +2107,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
ctx.abi.load_ring_esgs = load_ring_esgs;
ctx.abi.clamp_shadow_reference = false;
ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z;
ctx.abi.robust_buffer_access = options->robust_buffer_access;
ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;

View File

@@ -514,6 +514,34 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_info *info,
progress = true;
break;
}
case nir_intrinsic_load_frag_coord: {
if (!key->adjust_frag_coord_z)
continue;
if (!(nir_ssa_def_components_read(&intrin->dest.ssa) & (1 << 2)))
continue;
nir_ssa_def *frag_z = nir_channel(&b, &intrin->dest.ssa, 2);
/* adjusted_frag_z = fddx_fine(frag_z) * 0.0625 + frag_z */
nir_ssa_def *adjusted_frag_z = nir_fddx_fine(&b, frag_z);
adjusted_frag_z = nir_ffma_imm1(&b, adjusted_frag_z, 0.0625f, frag_z);
/* VRS Rate X = Ancillary[2:3] */
nir_ssa_def *ancillary =
nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
nir_ssa_def *x_rate = nir_ubfe(&b, ancillary, nir_imm_int(&b, 2), nir_imm_int(&b, 2));
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
nir_ssa_def *cond = nir_ieq(&b, x_rate, nir_imm_int(&b, 1));
frag_z = nir_bcsel(&b, cond, adjusted_frag_z, frag_z);
nir_ssa_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, frag_z, 2);
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, new_dest->parent_instr);
progress = true;
break;
}
default:
break;
}
@@ -1943,7 +1971,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
options->enable_mrt_output_nan_fixup =
module && !is_meta_shader(module->nir) && options->key.ps.enable_mrt_output_nan_fixup;
options->adjust_frag_coord_z = options->key.adjust_frag_coord_z;
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;

View File

@@ -115,7 +115,6 @@ struct radv_nir_compiler_options {
struct radv_pipeline_layout *layout;
struct radv_pipeline_key key;
bool robust_buffer_access;
bool adjust_frag_coord_z;
bool dump_shader;
bool dump_preoptir;
bool record_ir;

View File

@@ -519,7 +519,6 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
ctx->abi.clamp_shadow_reference = true;
ctx->abi.robust_buffer_access = true;
ctx->abi.convert_undef_to_zero = true;
ctx->abi.adjust_frag_coord_z = false;
ctx->abi.load_grid_size_from_user_sgpr = true;
const struct si_shader_info *info = &ctx->shader->selector->info;