r600/sfn: Use load_ubo_vec4 lowering pass
This replaces the lowering pass to align UBO loads at 16 byte boundaries. v2: use nir functions to query constants in ubo_vec4 (Eric) Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Eric Anholt <eric@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6743>
This commit is contained in:
@@ -390,100 +390,6 @@ bool r600_lower_scratch_addresses(nir_shader *shader)
|
||||
return progress;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
r600_lower_ubo_to_align16_impl(nir_builder *b, nir_instr *instr, void *_options)
|
||||
{
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
|
||||
assert(op->intrinsic == nir_intrinsic_load_ubo);
|
||||
|
||||
bool const_address = (nir_src_is_const(op->src[1]) && nir_src_is_const(op->src[0]));
|
||||
|
||||
nir_ssa_def *offset = op->src[1].ssa;
|
||||
|
||||
/* This is ugly: With const addressing we can actually set a proper fetch target mask,
|
||||
* but for this we need the component encoded, we don't shift and do de decoding in the
|
||||
* backend. Otherwise we shift by four and resolve the component here
|
||||
* (TODO: encode the start component in the intrinsic when the offset base is non-constant
|
||||
* but a multiple of 16 */
|
||||
|
||||
nir_ssa_def *new_offset = offset;
|
||||
if (!const_address)
|
||||
new_offset = nir_ishr(b, offset, nir_imm_int(b, 4));
|
||||
|
||||
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo_r600);
|
||||
load->num_components = const_address ? op->num_components : 4;
|
||||
load->src[0] = op->src[0];
|
||||
load->src[1] = nir_src_for_ssa(new_offset);
|
||||
nir_intrinsic_set_align(load, nir_intrinsic_align_mul(op), nir_intrinsic_align_offset(op));
|
||||
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
/* when four components are loaded or both the offset and the location
|
||||
* are constant, then the backend can deal with it better */
|
||||
if (op->num_components == 4 || const_address)
|
||||
return &load->dest.ssa;
|
||||
|
||||
/* What comes below is a performance disaster when the offset is not constant
|
||||
* because then we have to assume that any component can be the first one and we
|
||||
* have to pick the result manually. */
|
||||
nir_ssa_def *first_comp = nir_iand(b, nir_ishr(b, offset, nir_imm_int(b, 2)),
|
||||
nir_imm_int(b,3));
|
||||
|
||||
const unsigned swz_000[4] = {0, 0, 0};
|
||||
nir_ssa_def *component_select = nir_ieq(b, r600_imm_ivec3(b, 0, 1, 2),
|
||||
nir_swizzle(b, first_comp, swz_000, 3));
|
||||
|
||||
if (op->num_components == 1) {
|
||||
nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
|
||||
nir_channel(b, &load->dest.ssa, 0),
|
||||
nir_channel(b, &load->dest.ssa, 3));
|
||||
nir_ssa_def *check1 = nir_bcsel(b, nir_channel(b, component_select, 1),
|
||||
nir_channel(b, &load->dest.ssa, 1),
|
||||
check0);
|
||||
return nir_bcsel(b, nir_channel(b, component_select, 2),
|
||||
nir_channel(b, &load->dest.ssa, 2),
|
||||
check1);
|
||||
} else if (op->num_components == 2) {
|
||||
const unsigned szw_01[2] = {0, 1};
|
||||
const unsigned szw_12[2] = {1, 2};
|
||||
const unsigned szw_23[2] = {2, 3};
|
||||
|
||||
nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
|
||||
nir_swizzle(b, &load->dest.ssa, szw_01, 2),
|
||||
nir_swizzle(b, &load->dest.ssa, szw_23, 2));
|
||||
return nir_bcsel(b, nir_channel(b, component_select, 1),
|
||||
nir_swizzle(b, &load->dest.ssa, szw_12, 2),
|
||||
check0);
|
||||
} else {
|
||||
const unsigned szw_012[3] = {0, 1, 2};
|
||||
const unsigned szw_123[3] = {1, 2, 3};
|
||||
return nir_bcsel(b, nir_channel(b, component_select, 0),
|
||||
nir_swizzle(b, &load->dest.ssa, szw_012, 3),
|
||||
nir_swizzle(b, &load->dest.ssa, szw_123, 3));
|
||||
}
|
||||
}
|
||||
|
||||
bool r600_lower_ubo_to_align16_filter(const nir_instr *instr, const void *_options)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
|
||||
return op->intrinsic == nir_intrinsic_load_ubo;
|
||||
}
|
||||
|
||||
|
||||
bool r600_lower_ubo_to_align16(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_lower_instructions(shader,
|
||||
r600_lower_ubo_to_align16_filter,
|
||||
r600_lower_ubo_to_align16_impl,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
static void
|
||||
insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
|
||||
{
|
||||
@@ -875,10 +781,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
||||
const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sel->nir->functions));
|
||||
assert(func->impl->registers.length() == 0 && !has_saturate(func));
|
||||
|
||||
if (true) {
|
||||
optimize_once(sel->nir);
|
||||
NIR_PASS_V(sel->nir, r600_lower_ubo_to_align16);
|
||||
}
|
||||
NIR_PASS_V(sel->nir, nir_lower_ubo_vec4);
|
||||
|
||||
/* It seems the output of this optimization is cached somewhere, and
|
||||
* when there are registers, then we can no longer copy propagate, so
|
||||
* skip the optimization then. (There is probably a better way, but yeah)
|
||||
|
Reference in New Issue
Block a user