r600/sfn: lowered FS output IO

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9874>
This commit is contained in:
Gert Wollny
2021-03-21 22:45:06 +01:00
committed by Marge Bot
parent 3bbc078025
commit 84f5b15978
3 changed files with 155 additions and 10 deletions

View File

@@ -895,9 +895,16 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16); NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
if (sel->nir->info.stage == MESA_SHADER_VERTEX)
NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
}
nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in; nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
io_modes |= nir_var_shader_out; io_modes |= nir_var_shader_out;
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
@@ -921,6 +928,9 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size, NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
nir_lower_io_lower_64bit_to_32); nir_lower_io_lower_64bit_to_32);
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
/**/ /**/
if (lower_64bit) if (lower_64bit)
NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10); NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
@@ -938,15 +948,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sel->nir, nir_copy_prop); NIR_PASS_V(sel->nir, nir_copy_prop);
NIR_PASS_V(sel->nir, nir_opt_dce); NIR_PASS_V(sel->nir, nir_opt_dce);
if (sel->nir->info.stage == MESA_SHADER_VERTEX) auto sh = nir_shader_clone(sel->nir, sel->nir);
NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
}
auto sh = nir_shader_clone(sel->nir, sel->nir);
if (sh->info.stage == MESA_SHADER_TESS_CTRL || if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
sh->info.stage == MESA_SHADER_TESS_EVAL || sh->info.stage == MESA_SHADER_TESS_EVAL ||

View File

@@ -271,6 +271,9 @@ bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
case nir_intrinsic_load_interpolated_input: { case nir_intrinsic_load_interpolated_input: {
return process_load_input(ii, true); return process_load_input(ii, true);
} }
case nir_intrinsic_store_output:
return process_store_output(ii);
default: default:
; ;
} }
@@ -444,6 +447,48 @@ bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir
return false; return false;
} }
bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr)
{
auto semantic = nir_intrinsic_io_semantics(instr);
unsigned driver_loc = nir_intrinsic_base(instr);
if (sh_info().noutput <= driver_loc)
sh_info().noutput = driver_loc + 1;
r600_shader_io& io = sh_info().output[driver_loc];
tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location),
&io.name, &io.sid);
unsigned component = nir_intrinsic_component(instr);
io.write_mask |= nir_intrinsic_write_mask(instr) << component;
if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
sh_info().fs_write_all = true;
}
if (semantic.location == FRAG_RESULT_COLOR ||
(semantic.location >= FRAG_RESULT_DATA0 &&
semantic.location <= FRAG_RESULT_DATA7)) {
++m_max_counted_color_exports;
if (m_max_counted_color_exports > 1)
sh_info().fs_write_all = false;
return true;
}
if (semantic.location == FRAG_RESULT_DEPTH ||
semantic.location == FRAG_RESULT_STENCIL ||
semantic.location == FRAG_RESULT_SAMPLE_MASK) {
io.write_mask = 15;
return true;
}
return false;
}
bool FragmentShaderFromNir::do_process_outputs(nir_variable *output) bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
{ {
sfn_log << SfnLog::io << "Parse output variable " sfn_log << SfnLog::io << "Parse output variable "
@@ -537,11 +582,33 @@ bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_in
case nir_intrinsic_load_interpolated_input: { case nir_intrinsic_load_interpolated_input: {
return emit_load_interpolated_input(instr); return emit_load_interpolated_input(instr);
} }
case nir_intrinsic_store_output:
return emit_store_output(instr);
default: default:
return false; return false;
} }
} }
bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr)
{
auto location = nir_intrinsic_io_semantics(instr).location;
if (location == FRAG_RESULT_COLOR)
return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports);
if ((location >= FRAG_RESULT_DATA0 &&
location <= FRAG_RESULT_DATA7) ||
location == FRAG_RESULT_DEPTH ||
location == FRAG_RESULT_STENCIL ||
location == FRAG_RESULT_SAMPLE_MASK)
return emit_export_pixel(instr, 1);
sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
return false;
}
bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr) bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
{ {
unsigned loc = nir_intrinsic_io_semantics(instr).location; unsigned loc = nir_intrinsic_io_semantics(instr).location;
@@ -907,6 +974,77 @@ bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
} }
bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs)
{
std::array<uint32_t,4> swizzle;
unsigned writemask = nir_intrinsic_write_mask(instr);
auto semantics = nir_intrinsic_io_semantics(instr);
unsigned driver_location = nir_intrinsic_base(instr);
switch (semantics.location) {
case FRAG_RESULT_DEPTH:
writemask = 1;
swizzle = {0,7,7,7};
break;
case FRAG_RESULT_STENCIL:
writemask = 2;
swizzle = {7,0,7,7};
break;
case FRAG_RESULT_SAMPLE_MASK:
writemask = 4;
swizzle = {7,7,0,7};
break;
default:
for (int i = 0; i < 4; ++i) {
swizzle[i] = (i < instr->num_components) ? i : 7;
}
}
auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle);
set_output(driver_location, value.sel());
if (semantics.location == FRAG_RESULT_COLOR ||
(semantics.location >= FRAG_RESULT_DATA0 &&
semantics.location <= FRAG_RESULT_DATA7)) {
for (int k = 0 ; k < outputs; ++k) {
unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
if (location >= m_max_color_exports) {
sfn_log << SfnLog::io << "Pixel output loc:" << location
<< " dl:" << driver_location
<< " skipped because we have only " << m_max_color_exports << " CBs\n";
continue;
}
m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
if (sh_info().ps_export_highest < location)
sh_info().ps_export_highest = location;
sh_info().nr_ps_color_exports++;
unsigned mask = (0xfu << (location * 4));
sh_info().ps_color_export_mask |= mask;
emit_export_instruction(m_last_pixel_export);
};
} else if (semantics.location == FRAG_RESULT_DEPTH ||
semantics.location == FRAG_RESULT_STENCIL ||
semantics.location == FRAG_RESULT_SAMPLE_MASK) {
m_depth_exports++;
emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
} else {
return false;
}
return true;
}
bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs) bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
{ {
std::array<uint32_t,4> swizzle; std::array<uint32_t,4> swizzle;

View File

@@ -51,9 +51,14 @@ private:
void emit_shader_start() override; void emit_shader_start() override;
bool do_allocate_reserved_registers() override; bool do_allocate_reserved_registers() override;
bool do_process_outputs(nir_variable *output) override; bool do_process_outputs(nir_variable *output) override;
bool process_store_output(nir_intrinsic_instr *instr);
bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override; bool do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr) override;
bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override; bool do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr) override;
bool emit_store_output(nir_intrinsic_instr* instr);
bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs); bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip, bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
int num_components, int start_comp); int num_components, int start_comp);
bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op); bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);