radv: lower adjusting gl_FragCoord.z for VRS in NIR
fossils-db (Sienna Cichlid): Totals from 4432 (3.29% of 134913) affected shaders: VGPRs: 231232 -> 231880 (+0.28%) CodeSize: 24738224 -> 24718008 (-0.08%); split: -0.08%, +0.00% MaxWaves: 93120 -> 93000 (-0.13%) Instrs: 4540970 -> 4541062 (+0.00%); split: -0.01%, +0.01% Latency: 49658353 -> 49641444 (-0.03%); split: -0.05%, +0.01% InvThroughput: 9604328 -> 9603041 (-0.01%); split: -0.02%, +0.01% VClause: 66497 -> 66498 (+0.00%) SClause: 209530 -> 209532 (+0.00%); split: -0.01%, +0.01% Copies: 276135 -> 276249 (+0.04%); split: -0.14%, +0.18% PreSGPRs: 189409 -> 189415 (+0.00%) PreVGPRs: 207368 -> 207458 (+0.04%) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15450>
This commit is contained in:
@@ -4886,34 +4886,6 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
|
||||
bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), get_arg(ctx, ctx->args->ac.frag_pos[3]));
|
||||
}
|
||||
|
||||
if (ctx->options->adjust_frag_coord_z &&
|
||||
G_0286CC_POS_Z_FLOAT_ENA(ctx->program->config->spi_ps_input_ena)) {
|
||||
/* Adjust gl_FragCoord.z for VRS due to a hw bug on some GFX10.3 chips. */
|
||||
Operand frag_z = vec->operands[2];
|
||||
Temp adjusted_frag_z = bld.tmp(v1);
|
||||
Temp tmp;
|
||||
|
||||
/* dFdx fine */
|
||||
Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), frag_z, dpp_quad_perm(0, 0, 2, 2));
|
||||
tmp = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), frag_z, tl, dpp_quad_perm(1, 1, 3, 3));
|
||||
emit_wqm(bld, tmp, adjusted_frag_z, true);
|
||||
|
||||
/* adjusted_frag_z * 0.0625 + frag_z */
|
||||
adjusted_frag_z = bld.vop3(aco_opcode::v_fma_f32, bld.def(v1), adjusted_frag_z,
|
||||
Operand::c32(0x3d800000u /* 0.0625 */), frag_z);
|
||||
|
||||
/* VRS Rate X = Ancillary[2:3] */
|
||||
Temp x_rate =
|
||||
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary),
|
||||
Operand::c32(2u), Operand::c32(2u));
|
||||
|
||||
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
|
||||
Temp cond =
|
||||
bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(x_rate));
|
||||
vec->operands[2] =
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), frag_z, adjusted_frag_z, cond);
|
||||
}
|
||||
|
||||
for (Operand& op : vec->operands)
|
||||
op = op.isUndefined() ? Operand::zero() : op;
|
||||
|
||||
|
@@ -3540,26 +3540,6 @@ emit_load_frag_coord(struct ac_nir_context *ctx)
|
||||
ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
|
||||
ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
|
||||
|
||||
if (ctx->abi->adjust_frag_coord_z) {
|
||||
/* Adjust gl_FragCoord.z for VRS due to a hw bug on some GFX10.3 chips. */
|
||||
LLVMValueRef frag_z = values[2];
|
||||
|
||||
/* dFdx fine */
|
||||
LLVMValueRef adjusted_frag_z = emit_ddxy(ctx, nir_op_fddx_fine, frag_z);
|
||||
|
||||
/* adjusted_frag_z * 0.0625 + frag_z */
|
||||
adjusted_frag_z = LLVMBuildFAdd(ctx->ac.builder, frag_z,
|
||||
LLVMBuildFMul(ctx->ac.builder, adjusted_frag_z,
|
||||
LLVMConstReal(ctx->ac.f32, 0.0625), ""), "");
|
||||
|
||||
/* VRS Rate X = Ancillary[2:3] */
|
||||
LLVMValueRef x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);
|
||||
|
||||
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
|
||||
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");
|
||||
values[2] = LLVMBuildSelect(ctx->ac.builder, cond, adjusted_frag_z, frag_z, "");
|
||||
}
|
||||
|
||||
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
|
||||
}
|
||||
|
||||
|
@@ -144,11 +144,6 @@ struct ac_shader_abi {
|
||||
/* Clamp div by 0 (so it won't produce NaN) */
|
||||
bool clamp_div_by_zero;
|
||||
|
||||
/* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug on
|
||||
* some GFX10.3 chips.
|
||||
*/
|
||||
bool adjust_frag_coord_z;
|
||||
|
||||
/* Whether to inline the compute dispatch size in user sgprs. */
|
||||
bool load_grid_size_from_user_sgpr;
|
||||
};
|
||||
|
@@ -2107,7 +2107,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
|
||||
ctx.abi.load_ring_esgs = load_ring_esgs;
|
||||
ctx.abi.clamp_shadow_reference = false;
|
||||
ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z;
|
||||
ctx.abi.robust_buffer_access = options->robust_buffer_access;
|
||||
ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
|
||||
|
||||
|
@@ -514,6 +514,34 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_info *info,
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_frag_coord: {
|
||||
if (!key->adjust_frag_coord_z)
|
||||
continue;
|
||||
|
||||
if (!(nir_ssa_def_components_read(&intrin->dest.ssa) & (1 << 2)))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *frag_z = nir_channel(&b, &intrin->dest.ssa, 2);
|
||||
|
||||
/* adjusted_frag_z = fddx_fine(frag_z) * 0.0625 + frag_z */
|
||||
nir_ssa_def *adjusted_frag_z = nir_fddx_fine(&b, frag_z);
|
||||
adjusted_frag_z = nir_ffma_imm1(&b, adjusted_frag_z, 0.0625f, frag_z);
|
||||
|
||||
/* VRS Rate X = Ancillary[2:3] */
|
||||
nir_ssa_def *ancillary =
|
||||
nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
|
||||
nir_ssa_def *x_rate = nir_ubfe(&b, ancillary, nir_imm_int(&b, 2), nir_imm_int(&b, 2));
|
||||
|
||||
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
|
||||
nir_ssa_def *cond = nir_ieq(&b, x_rate, nir_imm_int(&b, 1));
|
||||
frag_z = nir_bcsel(&b, cond, adjusted_frag_z, frag_z);
|
||||
|
||||
nir_ssa_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, frag_z, 2);
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, new_dest->parent_instr);
|
||||
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1943,7 +1971,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
|
||||
options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
|
||||
options->enable_mrt_output_nan_fixup =
|
||||
module && !is_meta_shader(module->nir) && options->key.ps.enable_mrt_output_nan_fixup;
|
||||
options->adjust_frag_coord_z = options->key.adjust_frag_coord_z;
|
||||
options->debug.func = radv_compiler_debug;
|
||||
options->debug.private_data = &debug_data;
|
||||
|
||||
|
@@ -115,7 +115,6 @@ struct radv_nir_compiler_options {
|
||||
struct radv_pipeline_layout *layout;
|
||||
struct radv_pipeline_key key;
|
||||
bool robust_buffer_access;
|
||||
bool adjust_frag_coord_z;
|
||||
bool dump_shader;
|
||||
bool dump_preoptir;
|
||||
bool record_ir;
|
||||
|
@@ -519,7 +519,6 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
|
||||
ctx->abi.clamp_shadow_reference = true;
|
||||
ctx->abi.robust_buffer_access = true;
|
||||
ctx->abi.convert_undef_to_zero = true;
|
||||
ctx->abi.adjust_frag_coord_z = false;
|
||||
ctx->abi.load_grid_size_from_user_sgpr = true;
|
||||
|
||||
const struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
|
Reference in New Issue
Block a user