r600/sfn: lower IO for FS inputs and handle interpolation accordingly

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7715>
This commit is contained in:
Gert Wollny
2020-11-21 16:22:11 +01:00
committed by Marge Bot
parent a45e651b11
commit 3b6c17e296
4 changed files with 430 additions and 28 deletions

View File

@@ -1200,6 +1200,7 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
.vectorize_io = true,
.has_umad24 = true,
.has_umul24 = true,
.use_interpolated_input_intrinsics = true
};
const struct nir_shader_compiler_options r600_nir_options = {

View File

@@ -99,6 +99,10 @@ bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shade
const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
sfn_log << SfnLog::trans << "Scan shader\n";
if (sfn_log.has_debug_flag(SfnLog::instr))
nir_print_shader(const_cast<nir_shader *>(shader), stderr);
nir_foreach_block(block, func->impl) {
nir_foreach_instr(instr, block) {
if (!impl->scan_instruction(instr)) {
@@ -824,8 +828,13 @@ int r600_shader_from_nir(struct r600_context *rctx,
if (sel->nir->info.stage == MESA_SHADER_VERTEX)
NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
(nir_lower_io_options)
(nir_lower_io_lower_64bit_to_32));
NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
}
if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
(sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {

View File

@@ -47,8 +47,6 @@ FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
m_need_back_color(false),
m_front_face_loaded(false),
m_depth_exports(0),
m_enable_centroid_interpolators(false),
m_enable_sample_interpolators(false),
m_apply_sample_mask(key.ps.apply_sample_id_mask),
m_dual_source_blend(key.ps.dual_source_blend)
{
@@ -111,11 +109,168 @@ bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
}
}
unsigned barycentric_ij_index(nir_intrinsic_instr *instr)
{
unsigned index = 0;
switch (instr->intrinsic) {
case nir_intrinsic_load_barycentric_sample:
index = 0;
break;
case nir_intrinsic_load_barycentric_at_sample:
case nir_intrinsic_load_barycentric_at_offset:
case nir_intrinsic_load_barycentric_pixel:
index = 1;
break;
case nir_intrinsic_load_barycentric_centroid:
index = 2;
break;
default:
unreachable("Unknown interpolator intrinsic");
}
switch (nir_intrinsic_interp_mode(instr)) {
case INTERP_MODE_NONE:
case INTERP_MODE_SMOOTH:
case INTERP_MODE_COLOR:
return index;
break;
case INTERP_MODE_NOPERSPECTIVE:
return index + 3;
break;
case INTERP_MODE_FLAT:
case INTERP_MODE_EXPLICIT:
default:
assert(0 && "unknown/unsupported mode for load_interpolated");
}
}
bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr,
bool interpolated)
{
sfn_log << SfnLog::io << "Parse " << instr->instr
<< "\n";
auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]);
assert(index);
unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32;
auto semantic = r600_get_varying_semantic(location);
tgsi_semantic name = (tgsi_semantic)semantic.first;
unsigned sid = semantic.second;
if (location == VARYING_SLOT_POS) {
m_sv_values.set(es_pos);
return true;
}
if (location == VARYING_SLOT_FACE) {
m_sv_values.set(es_face);
return true;
}
tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
bool uses_interpol_at_centroid = false;
if (interpolated) {
glsl_interp_mode mode = INTERP_MODE_NONE;
auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
switch (parent->intrinsic) {
case nir_intrinsic_load_barycentric_sample:
tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
break;
case nir_intrinsic_load_barycentric_at_sample:
case nir_intrinsic_load_barycentric_at_offset:
case nir_intrinsic_load_barycentric_pixel:
tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
break;
case nir_intrinsic_load_barycentric_centroid:
tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
uses_interpol_at_centroid = true;
break;
default:
std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
<< nir_intrinsic_infos[instr->intrinsic].name
<< " interpolator?\n";
assert(0);
}
switch (mode) {
case INTERP_MODE_NONE:
if (name == TGSI_SEMANTIC_COLOR) {
tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
break;
}
/* fallthrough */
case INTERP_MODE_SMOOTH:
tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
break;
case INTERP_MODE_NOPERSPECTIVE:
tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
break;
case INTERP_MODE_FLAT:
break;
case INTERP_MODE_COLOR:
tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
break;
case INTERP_MODE_EXPLICIT:
default:
assert(0);
}
m_interpolators_used.set(barycentric_ij_index(parent));
}
switch (name) {
case TGSI_SEMANTIC_COLOR: {
m_shaderio.add_input(new ShaderInputColor(name, sid,
nir_intrinsic_base(instr) + index->u32,
nir_intrinsic_component(instr),
nir_dest_num_components(instr->dest),
tgsi_interpolate, tgsi_loc));
m_need_back_color = m_two_sided_color;
return true;
}
case TGSI_SEMANTIC_PRIMID:
sh_info().gs_prim_id_input = true;
sh_info().ps_prim_id_input = m_shaderio.inputs().size();
/* fallthrough */
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_LAYER:
case TGSI_SEMANTIC_PCOORD:
case TGSI_SEMANTIC_VIEWPORT_INDEX:
case TGSI_SEMANTIC_CLIPDIST: {
auto varying = m_shaderio.find_varying(name, sid, nir_intrinsic_component(instr));
if (!varying) {
m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
nir_intrinsic_component(instr),
nir_dest_num_components(instr->dest),
tgsi_interpolate, tgsi_loc));
} else if (uses_interpol_at_centroid)
varying->set_uses_interpolate_at_centroid();
return true;
}
default:
return false;
}
}
bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
{
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
switch (ii->intrinsic) {
case nir_intrinsic_load_front_face:
m_sv_values.set(es_face);
@@ -129,16 +284,14 @@ bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
case nir_intrinsic_load_sample_id:
m_sv_values.set(es_sample_id);
break;
case nir_intrinsic_interp_deref_at_centroid:
/* This is not a sysvalue, should go elsewhere */
m_enable_centroid_interpolators = true;
break;
case nir_intrinsic_interp_deref_at_sample:
m_enable_sample_interpolators = true;
break;
case nir_intrinsic_load_helper_invocation:
m_sv_values.set(es_helper_invocation);
break;
case nir_intrinsic_load_input:
return process_load_input(ii, false);
case nir_intrinsic_load_interpolated_input: {
return process_load_input(ii, true);
}
default:
;
}
@@ -156,23 +309,13 @@ bool FragmentShaderFromNir::do_allocate_reserved_registers()
int face_reg_index = -1;
int sample_id_index = -1;
// enabled interpolators based on inputs
for (auto& i: m_shaderio.inputs()) {
int ij = i->ij_index();
if (ij >= 0) {
m_interpolator[ij].enabled = true;
for (unsigned i = 0; i < s_max_interpolators; ++i) {
if (m_interpolators_used.test(i)) {
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
m_interpolator[i].enabled = true;
}
}
/* Lazy, enable both possible interpolators,
* TODO: check which ones are really needed */
if (m_enable_centroid_interpolators) {
m_interpolator[2].enabled = true; /* perspective */
m_interpolator[5].enabled = true; /* linear */
}
if (m_enable_sample_interpolators)
m_interpolator[1].enabled = true; /* perspective */
// sort the varying inputs
m_shaderio.sort_varying_inputs();
@@ -180,7 +323,7 @@ bool FragmentShaderFromNir::do_allocate_reserved_registers()
int num_baryc = 0;
for (int i = 0; i < 6; ++i) {
if (m_interpolator[i].enabled) {
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
m_interpolator[i].ij_index = num_baryc;
@@ -400,11 +543,248 @@ bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_in
return emit_load_sample_pos(instr);
case nir_intrinsic_load_helper_invocation:
return load_preloaded_value(instr->dest, 0, m_helper_invocation);
case nir_intrinsic_load_input:
return emit_load_input(instr);
case nir_intrinsic_load_barycentric_sample:
case nir_intrinsic_load_barycentric_pixel:
case nir_intrinsic_load_barycentric_centroid: {
unsigned ij = barycentric_ij_index(instr);
return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) &&
load_preloaded_value(instr->dest, 1, m_interpolator[ij].j);
}
case nir_intrinsic_load_barycentric_at_offset:
return load_barycentric_at_offset(instr);
case nir_intrinsic_load_barycentric_at_sample:
return load_barycentric_at_sample(instr);
case nir_intrinsic_load_interpolated_input: {
return emit_load_interpolated_input(instr);
}
default:
return false;
}
}
bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
{
unsigned loc = nir_intrinsic_io_semantics(instr).location;
switch (loc) {
case VARYING_SLOT_POS:
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
load_preloaded_value(instr->dest, i, m_frag_pos[i]);
}
return true;
case VARYING_SLOT_FACE:
return load_preloaded_value(instr->dest, 0, m_front_face_reg);
default:
;
}
auto param = nir_src_as_const_value(instr->src[1]);
assert(param && "Indirect PS inputs not (yet) supported");
auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
io.set_gpr(dst.sel());
Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)};
if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
nir_intrinsic_component(instr)))
return false;
if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
auto & color_input = static_cast<ShaderInputColor&> (io);
auto& bgio = m_shaderio.input(color_input.back_color_input_index());
bgio.set_gpr(allocate_temp_register());
GPRVector bgcol(bgio.gpr(), {0,1,2,3});
load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0);
load_front_face();
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
AluInstruction *ir = nullptr;
if (nir_intrinsic_component(instr) != 0) {
std::cerr << "move in right pos \n";
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
return true;
}
bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr)
{
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
PValue dummy(new GPRValue(interpolator.i->sel(), 0));
GPRVector help = get_temp_vec4();
GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
getgradh->set_dest_swizzle({0,1,7,7});
getgradh->set_flag(TexInstruction::x_unnormalized);
getgradh->set_flag(TexInstruction::y_unnormalized);
getgradh->set_flag(TexInstruction::z_unnormalized);
getgradh->set_flag(TexInstruction::w_unnormalized);
getgradh->set_flag(TexInstruction::grad_fine);
emit_instruction(getgradh);
auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
getgradv->set_dest_swizzle({7,7,0,1});
getgradv->set_flag(TexInstruction::x_unnormalized);
getgradv->set_flag(TexInstruction::y_unnormalized);
getgradv->set_flag(TexInstruction::z_unnormalized);
getgradv->set_flag(TexInstruction::w_unnormalized);
getgradv->set_flag(TexInstruction::grad_fine);
emit_instruction(getgradv);
PValue ofs_x = from_nir(instr->src[0], 0);
PValue ofs_y = from_nir(instr->src[0], 1);
emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr}));
return true;
}
bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr)
{
GPRVector slope = get_temp_vec4();
auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
from_nir_with_fetch_constant(instr->src[0], 0),
0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
fetch->set_flag(vtx_srf_mode);
emit_instruction(fetch);
GPRVector grad = get_temp_vec4();
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
assert(interpolator.enabled);
PValue dummy(new GPRValue(interpolator.i->sel(), 0));
GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
tex->set_flag(TexInstruction::grad_fine);
tex->set_flag(TexInstruction::x_unnormalized);
tex->set_flag(TexInstruction::y_unnormalized);
tex->set_flag(TexInstruction::z_unnormalized);
tex->set_flag(TexInstruction::w_unnormalized);
tex->set_dest_swizzle({0,1,7,7});
emit_instruction(tex);
tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
tex->set_flag(TexInstruction::x_unnormalized);
tex->set_flag(TexInstruction::y_unnormalized);
tex->set_flag(TexInstruction::z_unnormalized);
tex->set_flag(TexInstruction::w_unnormalized);
tex->set_flag(TexInstruction::grad_fine);
tex->set_dest_swizzle({7,7,0,1});
emit_instruction(tex);
emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write}));
emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr}));
return true;
}
bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr)
{
unsigned loc = nir_intrinsic_io_semantics(instr).location;
auto param = nir_src_as_const_value(instr->src[0]);
assert(param && "Indirect PS inputs not (yet) supported");
auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
unsigned num_components = nir_dest_num_components(instr->dest);
switch (loc) {
case VARYING_SLOT_POS:
for (unsigned i = 0; i < num_components; ++i) {
load_preloaded_value(instr->dest, i, m_frag_pos[i]);
}
return true;
case VARYING_SLOT_FACE:
return load_preloaded_value(instr->dest, 0, m_front_face_reg);
default:
;
}
auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op1_interp_load_p0, dst[i],
PValue(new InlineConstValue(ALU_SRC_PARAM_BASE +
io.lds_pos(), i)),
EmitInstruction::write);
emit_instruction(ir);
}
ir->set_flag(alu_last_instr);
/* TODO: back color */
if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
Interpolator ip = {false, 0, NULL, NULL};
auto & color_input = static_cast<ShaderInputColor&> (io);
auto& bgio = m_shaderio.input(color_input.back_color_input_index());
bgio.set_gpr(allocate_temp_register());
GPRVector bgcol(bgio.gpr(), {0,1,2,3});
load_interpolated(bgcol, bgio, ip, num_components, 0);
load_front_face();
AluInstruction *ir = nullptr;
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
if (nir_intrinsic_component(instr) != 0) {
std::cerr << "move in right pos \n";
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
emit_instruction(ir);
}
if (ir)
ir->set_flag(alu_last_instr);
}
return true;
}
void FragmentShaderFromNir::load_front_face()
{
assert(m_front_face_reg);
@@ -656,7 +1036,7 @@ bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
// replace io with ShaderInputVarying
if (io.interpolate() > 0) {
sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i << ")" << "\n";
if (num_components == 1) {
switch (start_comp) {
@@ -727,6 +1107,8 @@ bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderIn
const Interpolator& ip, EAluOp op, int writemask)
{
AluInstruction *ir = nullptr;
assert(ip.j);
assert(ip.i);
for (unsigned i = 0; i < 4 ; ++i) {
ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
(writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);

View File

@@ -68,6 +68,7 @@ private:
void load_front_face();
bool emit_load_input(nir_intrinsic_instr* instr);
bool emit_load_front_face(nir_intrinsic_instr* instr);
bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
bool emit_load_sample_pos(nir_intrinsic_instr* instr);
@@ -75,6 +76,12 @@ private:
bool emit_interp_deref_at_sample(nir_intrinsic_instr* instr);
bool emit_interp_deref_at_offset(nir_intrinsic_instr* instr);
bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
unsigned m_max_color_exports;
unsigned m_max_counted_color_exports;
bool m_two_sided_color;
@@ -96,8 +103,11 @@ private:
unsigned m_depth_exports;
std::map<unsigned, PValue> m_input_cache;
bool m_enable_centroid_interpolators;
bool m_enable_sample_interpolators;
static const int s_max_interpolators = 6;
std::bitset<s_max_interpolators> m_interpolators_used;
unsigned m_apply_sample_mask;
bool m_dual_source_blend;
};