r600/sfn; go back to not lowering uniforms to UBOs
Lowering uniforms to UBOs results in an aditional iadd for the
UBO buffer id evaluation, and for indirect buffers access that
results in an unnecessary op that can be avoided by not lowering
uniforms. There is some code duplication when reading the uniforms
but it saves a whole instruction group per indirect cont buffer
access.
This reverts commit 98eb00face
with
some additional fixes.
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6879>
This commit is contained in:
@@ -1201,7 +1201,6 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
|
||||
.vectorize_io = true,
|
||||
.has_umad24 = true,
|
||||
.has_umul24 = true,
|
||||
.lower_uniforms_to_ubo = true
|
||||
};
|
||||
|
||||
const struct nir_shader_compiler_options r600_nir_options = {
|
||||
@@ -1224,7 +1223,6 @@ const struct nir_shader_compiler_options r600_nir_options = {
|
||||
.vectorize_io = true,
|
||||
.has_umad24 = true,
|
||||
.has_umul24 = true,
|
||||
.lower_uniforms_to_ubo = true
|
||||
};
|
||||
|
||||
|
||||
|
@@ -657,6 +657,8 @@ bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* ins
|
||||
return emit_load_scratch(instr);
|
||||
case nir_intrinsic_store_deref:
|
||||
return emit_store_deref(instr);
|
||||
case nir_intrinsic_load_uniform:
|
||||
return load_uniform(instr);
|
||||
case nir_intrinsic_discard:
|
||||
case nir_intrinsic_discard_if:
|
||||
return emit_discard_if(instr);
|
||||
@@ -868,7 +870,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
assert(cmp < 4);
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32));
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
@@ -881,7 +883,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
||||
return true;
|
||||
|
||||
} else {
|
||||
return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32);
|
||||
return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32 + 1);
|
||||
}
|
||||
} else {
|
||||
if (buf_offset) {
|
||||
@@ -915,7 +917,7 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
||||
}
|
||||
|
||||
auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
|
||||
0, bufid, bim_zero);
|
||||
1, bufid, bim_zero);
|
||||
ir->set_dest_swizzle(swz);
|
||||
|
||||
emit_instruction(ir);
|
||||
@@ -952,6 +954,46 @@ bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
|
||||
return do_emit_load_deref(var, instr);
|
||||
}
|
||||
|
||||
bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
|
||||
{
|
||||
r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
|
||||
<< *reinterpret_cast<nir_instr*>(instr)
|
||||
<< "'\n";
|
||||
|
||||
|
||||
/* If the target register is a SSA register and the loading is not
|
||||
* indirect then we can do lazy loading, i.e. the uniform value can
|
||||
* be used directly. Otherwise we have to load the data for real
|
||||
* rigt away.
|
||||
*/
|
||||
auto literal = nir_src_as_const_value(instr->src[0]);
|
||||
int base = nir_intrinsic_base(instr);
|
||||
|
||||
if (literal) {
|
||||
AluInstruction *ir = nullptr;
|
||||
|
||||
for (int i = 0; i < instr->num_components ; ++i) {
|
||||
PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
|
||||
sfn_log << SfnLog::io << "uniform "
|
||||
<< instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
|
||||
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
|
||||
u, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
} else {
|
||||
PValue addr = from_nir(instr->src[0], 0, 0);
|
||||
return load_uniform_indirect(instr, addr, 16 * base, 0);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
|
||||
{
|
||||
if (!addr) {
|
||||
@@ -963,7 +1005,7 @@ bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, P
|
||||
std::array<int, 4> swz = {7,7,7,7};
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
trgt.set_reg_i(i, from_nir(instr->dest, i));
|
||||
swz[i] = i + nir_intrinsic_component(instr);
|
||||
swz[i] = i;
|
||||
}
|
||||
|
||||
if (addr->type() != Value::gpr) {
|
||||
@@ -971,7 +1013,6 @@ bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, P
|
||||
addr = trgt.reg_i(0);
|
||||
}
|
||||
|
||||
/* FIXME: buffer index and index mode are not set correctly */
|
||||
auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
|
||||
bufferid, PValue(), bim_none);
|
||||
ir->set_dest_swizzle(swz);
|
||||
|
@@ -166,6 +166,7 @@ private:
|
||||
|
||||
bool emit_store_deref(nir_intrinsic_instr* instr);
|
||||
|
||||
bool load_uniform(nir_intrinsic_instr* instr);
|
||||
bool process_uniforms(nir_variable *uniform);
|
||||
bool process_inputs(nir_variable *input);
|
||||
bool process_outputs(nir_variable *output);
|
||||
|
@@ -210,7 +210,7 @@ UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
|
||||
UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
|
||||
Value(Value::kconst, chan),
|
||||
m_index(sel),
|
||||
m_kcache_bank(0),
|
||||
m_kcache_bank(1),
|
||||
m_addr(addr)
|
||||
{
|
||||
|
||||
|
Reference in New Issue
Block a user