radv,aco: lower barycentric_at_sample in NIR

fossils-db (NAVI21):
Totals from 158 (0.12% of 134913) affected shaders:
CodeSize: 569456 -> 568824 (-0.11%)

Only Control seems affected.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18615>
This commit is contained in:
Samuel Pitoiset
2022-09-15 14:58:51 +02:00
committed by Marge Bot
parent 9f0b4da875
commit 704ef1fd3b
5 changed files with 24 additions and 149 deletions

View File

@@ -8197,110 +8197,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
emit_split_vector(ctx, dst, 3);
break;
}
case nir_intrinsic_load_barycentric_at_sample: {
Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
uint32_t sample_pos_offset = RING_PS_SAMPLE_POSITIONS * 16;
if (ctx->options->key.ps.num_samples == 2) {
sample_pos_offset += 1 << 3;
} else if (ctx->options->key.ps.num_samples == 4) {
sample_pos_offset += 3 << 3;
} else if (ctx->options->key.ps.num_samples == 8) {
sample_pos_offset += 7 << 3;
} else {
assert(ctx->options->key.ps.num_samples == 0);
bld.copy(Definition(dst), bary);
emit_split_vector(ctx, dst, 2);
break;
}
Temp sample_pos;
Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
nir_const_value* const_addr = nir_src_as_const_value(instr->src[0]);
Temp private_segment_buffer = ctx->program->private_segment_buffer;
// TODO: bounds checking?
if (addr.type() == RegType::sgpr) {
Operand offset;
if (const_addr) {
sample_pos_offset += const_addr->u32 << 3;
offset = Operand::c32(sample_pos_offset);
} else if (ctx->options->gfx_level >= GFX9) {
offset = bld.sop2(aco_opcode::s_lshl3_add_u32, bld.def(s1), bld.def(s1, scc), addr,
Operand::c32(sample_pos_offset));
} else {
offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr,
Operand::c32(3u));
offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
Operand::c32(sample_pos_offset));
}
Operand off = bld.copy(bld.def(s1), Operand(offset));
sample_pos =
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, off);
} else if (ctx->options->gfx_level >= GFX9) {
addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr,
private_segment_buffer, sample_pos_offset);
} else if (ctx->options->gfx_level >= GFX7) {
/* addr += private_segment_buffer + sample_pos_offset */
Temp tmp0 = bld.tmp(s1);
Temp tmp1 = bld.tmp(s1);
bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1),
private_segment_buffer);
Definition scc_tmp = bld.def(s1, scc);
tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0,
Operand::c32(sample_pos_offset));
tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1,
Operand::zero(), bld.scc(scc_tmp.getTemp()));
addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
Temp pck0 = bld.tmp(v1);
Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp();
tmp1 = as_vgpr(ctx, tmp1);
Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), bld.def(bld.lm), tmp1,
Operand::zero(), carry);
addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), pck0, pck1);
/* sample_pos = flat_load_dwordx2 addr */
sample_pos = bld.flat(aco_opcode::flat_load_dwordx2, bld.def(v2), addr, Operand(s1));
} else {
assert(ctx->options->gfx_level == GFX6);
uint32_t rsrc_conf = S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
Operand::zero(), Operand::c32(rsrc_conf));
addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), addr, Operand::zero());
sample_pos = bld.tmp(v2);
aco_ptr<MUBUF_instruction> load{create_instruction<MUBUF_instruction>(
aco_opcode::buffer_load_dwordx2, Format::MUBUF, 3, 1)};
load->definitions[0] = Definition(sample_pos);
load->operands[0] = Operand(rsrc);
load->operands[1] = Operand(addr);
load->operands[2] = Operand::zero();
load->offset = sample_pos_offset;
load->offen = 0;
load->addr64 = true;
load->glc = false;
load->dlc = false;
load->disable_wqm = false;
ctx->block->instructions.emplace_back(std::move(load));
}
/* sample_pos -= 0.5 */
Temp pos1 = bld.tmp(RegClass(sample_pos.type(), 1));
Temp pos2 = bld.tmp(RegClass(sample_pos.type(), 1));
bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), sample_pos);
pos1 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos1, Operand::c32(0x3f000000u));
pos2 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos2, Operand::c32(0x3f000000u));
emit_interp_center(ctx, dst, bary, pos1, pos2);
break;
}
case nir_intrinsic_load_barycentric_at_offset: {
Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
RegClass rc = RegClass(offset.type(), 1);

View File

@@ -170,7 +170,6 @@ struct aco_stage_input {
struct {
uint32_t col_format;
uint8_t num_samples;
/* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */
bool alpha_to_coverage_via_mrtz;

View File

@@ -145,7 +145,6 @@ radv_aco_convert_pipe_key(struct aco_stage_input *aco_info,
ASSIGN_FIELD_CP(vs.vertex_binding_align);
ASSIGN_FIELD(tcs.tess_input_vertices);
ASSIGN_FIELD(ps.col_format);
ASSIGN_FIELD(ps.num_samples);
ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz);
}

View File

@@ -190,47 +190,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
}
}
static uint32_t
radv_get_sample_pos_offset(uint32_t num_samples)
{
uint32_t sample_pos_offset = 0;
switch (num_samples) {
case 2:
sample_pos_offset = 1;
break;
case 4:
sample_pos_offset = 3;
break;
case 8:
sample_pos_offset = 7;
break;
default:
break;
}
return sample_pos_offset;
}
static LLVMValueRef
load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef result;
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false);
LLVMValueRef ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ring_offsets, &index, 1, "");
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), "");
uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->options->key.ps.num_samples);
sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id,
LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
return result;
}
static void
visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
LLVMValueRef *addrs)
@@ -1444,8 +1403,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
ctx.abi.load_inputs = radv_load_vs_inputs;
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
ctx.abi.load_sample_position = load_sample_position;
}
if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) {

View File

@@ -595,6 +595,30 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_s
progress = true;
break;
}
case nir_intrinsic_load_barycentric_at_sample: {
nir_ssa_def *new_dest;
if (!key->ps.num_samples) {
new_dest =
nir_load_barycentric_pixel(&b, 32,
.interp_mode = nir_intrinsic_interp_mode(intrin));
} else {
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa);
/* sample_pos -= 0.5 */
sample_pos = nir_fsub(&b, sample_pos, nir_imm_float(&b, 0.5f));
new_dest =
nir_load_barycentric_at_offset(&b, 32, sample_pos,
.interp_mode = nir_intrinsic_interp_mode(intrin));
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_instr_remove(instr);
progress = true;
break;
}
default:
break;
}