radv,aco: lower barycentric_at_sample in NIR
fossils-db (NAVI21): Totals from 158 (0.12% of 134913) affected shaders: CodeSize: 569456 -> 568824 (-0.11%) Only Control seems affected. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18615>
This commit is contained in:

committed by
Marge Bot

parent
9f0b4da875
commit
704ef1fd3b
@@ -8197,110 +8197,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
emit_split_vector(ctx, dst, 3);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_barycentric_at_sample: {
|
||||
Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
uint32_t sample_pos_offset = RING_PS_SAMPLE_POSITIONS * 16;
|
||||
if (ctx->options->key.ps.num_samples == 2) {
|
||||
sample_pos_offset += 1 << 3;
|
||||
} else if (ctx->options->key.ps.num_samples == 4) {
|
||||
sample_pos_offset += 3 << 3;
|
||||
} else if (ctx->options->key.ps.num_samples == 8) {
|
||||
sample_pos_offset += 7 << 3;
|
||||
} else {
|
||||
assert(ctx->options->key.ps.num_samples == 0);
|
||||
bld.copy(Definition(dst), bary);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
Temp sample_pos;
|
||||
Temp addr = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
nir_const_value* const_addr = nir_src_as_const_value(instr->src[0]);
|
||||
Temp private_segment_buffer = ctx->program->private_segment_buffer;
|
||||
// TODO: bounds checking?
|
||||
if (addr.type() == RegType::sgpr) {
|
||||
Operand offset;
|
||||
if (const_addr) {
|
||||
sample_pos_offset += const_addr->u32 << 3;
|
||||
offset = Operand::c32(sample_pos_offset);
|
||||
} else if (ctx->options->gfx_level >= GFX9) {
|
||||
offset = bld.sop2(aco_opcode::s_lshl3_add_u32, bld.def(s1), bld.def(s1, scc), addr,
|
||||
Operand::c32(sample_pos_offset));
|
||||
} else {
|
||||
offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr,
|
||||
Operand::c32(3u));
|
||||
offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset,
|
||||
Operand::c32(sample_pos_offset));
|
||||
}
|
||||
|
||||
Operand off = bld.copy(bld.def(s1), Operand(offset));
|
||||
sample_pos =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, off);
|
||||
|
||||
} else if (ctx->options->gfx_level >= GFX9) {
|
||||
addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
|
||||
sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr,
|
||||
private_segment_buffer, sample_pos_offset);
|
||||
} else if (ctx->options->gfx_level >= GFX7) {
|
||||
/* addr += private_segment_buffer + sample_pos_offset */
|
||||
Temp tmp0 = bld.tmp(s1);
|
||||
Temp tmp1 = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1),
|
||||
private_segment_buffer);
|
||||
Definition scc_tmp = bld.def(s1, scc);
|
||||
tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0,
|
||||
Operand::c32(sample_pos_offset));
|
||||
tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1,
|
||||
Operand::zero(), bld.scc(scc_tmp.getTemp()));
|
||||
addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
|
||||
Temp pck0 = bld.tmp(v1);
|
||||
Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp();
|
||||
tmp1 = as_vgpr(ctx, tmp1);
|
||||
Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), bld.def(bld.lm), tmp1,
|
||||
Operand::zero(), carry);
|
||||
addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), pck0, pck1);
|
||||
|
||||
/* sample_pos = flat_load_dwordx2 addr */
|
||||
sample_pos = bld.flat(aco_opcode::flat_load_dwordx2, bld.def(v2), addr, Operand(s1));
|
||||
} else {
|
||||
assert(ctx->options->gfx_level == GFX6);
|
||||
|
||||
uint32_t rsrc_conf = S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
|
||||
Operand::zero(), Operand::c32(rsrc_conf));
|
||||
|
||||
addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr);
|
||||
addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), addr, Operand::zero());
|
||||
|
||||
sample_pos = bld.tmp(v2);
|
||||
|
||||
aco_ptr<MUBUF_instruction> load{create_instruction<MUBUF_instruction>(
|
||||
aco_opcode::buffer_load_dwordx2, Format::MUBUF, 3, 1)};
|
||||
load->definitions[0] = Definition(sample_pos);
|
||||
load->operands[0] = Operand(rsrc);
|
||||
load->operands[1] = Operand(addr);
|
||||
load->operands[2] = Operand::zero();
|
||||
load->offset = sample_pos_offset;
|
||||
load->offen = 0;
|
||||
load->addr64 = true;
|
||||
load->glc = false;
|
||||
load->dlc = false;
|
||||
load->disable_wqm = false;
|
||||
ctx->block->instructions.emplace_back(std::move(load));
|
||||
}
|
||||
|
||||
/* sample_pos -= 0.5 */
|
||||
Temp pos1 = bld.tmp(RegClass(sample_pos.type(), 1));
|
||||
Temp pos2 = bld.tmp(RegClass(sample_pos.type(), 1));
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), sample_pos);
|
||||
pos1 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos1, Operand::c32(0x3f000000u));
|
||||
pos2 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos2, Operand::c32(0x3f000000u));
|
||||
|
||||
emit_interp_center(ctx, dst, bary, pos1, pos2);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_barycentric_at_offset: {
|
||||
Temp offset = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
RegClass rc = RegClass(offset.type(), 1);
|
||||
|
@@ -170,7 +170,6 @@ struct aco_stage_input {
|
||||
|
||||
struct {
|
||||
uint32_t col_format;
|
||||
uint8_t num_samples;
|
||||
|
||||
/* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */
|
||||
bool alpha_to_coverage_via_mrtz;
|
||||
|
@@ -145,7 +145,6 @@ radv_aco_convert_pipe_key(struct aco_stage_input *aco_info,
|
||||
ASSIGN_FIELD_CP(vs.vertex_binding_align);
|
||||
ASSIGN_FIELD(tcs.tess_input_vertices);
|
||||
ASSIGN_FIELD(ps.col_format);
|
||||
ASSIGN_FIELD(ps.num_samples);
|
||||
ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz);
|
||||
}
|
||||
|
||||
|
@@ -190,47 +190,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
radv_get_sample_pos_offset(uint32_t num_samples)
|
||||
{
|
||||
uint32_t sample_pos_offset = 0;
|
||||
|
||||
switch (num_samples) {
|
||||
case 2:
|
||||
sample_pos_offset = 1;
|
||||
break;
|
||||
case 4:
|
||||
sample_pos_offset = 3;
|
||||
break;
|
||||
case 8:
|
||||
sample_pos_offset = 7;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return sample_pos_offset;
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
|
||||
{
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
|
||||
LLVMValueRef result;
|
||||
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false);
|
||||
LLVMValueRef ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ring_offsets, &index, 1, "");
|
||||
|
||||
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), "");
|
||||
|
||||
uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->options->key.ps.num_samples);
|
||||
|
||||
sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id,
|
||||
LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
|
||||
result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
|
||||
LLVMValueRef *addrs)
|
||||
@@ -1444,8 +1403,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
|
||||
ctx.abi.load_inputs = radv_load_vs_inputs;
|
||||
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
ctx.abi.load_sample_position = load_sample_position;
|
||||
}
|
||||
|
||||
if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) {
|
||||
|
@@ -595,6 +595,30 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_s
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_barycentric_at_sample: {
|
||||
nir_ssa_def *new_dest;
|
||||
|
||||
if (!key->ps.num_samples) {
|
||||
new_dest =
|
||||
nir_load_barycentric_pixel(&b, 32,
|
||||
.interp_mode = nir_intrinsic_interp_mode(intrin));
|
||||
} else {
|
||||
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa);
|
||||
|
||||
/* sample_pos -= 0.5 */
|
||||
sample_pos = nir_fsub(&b, sample_pos, nir_imm_float(&b, 0.5f));
|
||||
|
||||
new_dest =
|
||||
nir_load_barycentric_at_offset(&b, 32, sample_pos,
|
||||
.interp_mode = nir_intrinsic_interp_mode(intrin));
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_instr_remove(instr);
|
||||
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
Reference in New Issue
Block a user