nir: handle arbitrary per-view outputs in nir_lower_multiview

This is needed for panvk, where multiview is "all or nothing". When
multiview is enabled, all outputs may be written with separate values
for each view.

The edge case mentioned in the previous `nir_can_lower_multiview` is now
handled because we now handle an arbitrary number of per-view output
vars instead of expecting to find exactly one.

Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31704>
This commit is contained in:
Benjamin Lee
2024-10-09 17:58:35 -07:00
committed by Marge Bot
parent 06d3eb8e01
commit 975c3ecd1e
4 changed files with 77 additions and 49 deletions

View File

@@ -6739,9 +6739,21 @@ bool nir_lower_memory_model(nir_shader *shader);
bool nir_lower_goto_ifs(nir_shader *shader);
bool nir_lower_continue_constructs(nir_shader *shader);
typedef struct nir_lower_multiview_options {
uint32_t view_mask;
/**
* Bitfield of output locations that may be converted to a per-view array.
*
* If a variable exists in an allowed location, it will be converted to an
* array even if its value does not depend on the view index.
*/
uint64_t allowed_per_view_outputs;
} nir_lower_multiview_options;
bool nir_shader_uses_view_index(nir_shader *shader);
bool nir_can_lower_multiview(nir_shader *shader);
bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
bool nir_can_lower_multiview(nir_shader *shader, nir_lower_multiview_options options);
bool nir_lower_multiview(nir_shader *shader, nir_lower_multiview_options options);
bool nir_lower_view_index_to_device_index(nir_shader *shader);

View File

@@ -35,7 +35,9 @@
* varyings will be the same between the different views. We put the body of
* the original vertex shader in a loop, writing to a different copy of
* gl_Position each loop iteration, and then let other optimizations clean up
* the mess.
* the mess. On some hardware it is also possible to write different copies of
* other varyings, expanding the set of shaders that the optimization is
* usable for.
*/
static bool
@@ -103,12 +105,13 @@ nir_shader_uses_view_index(nir_shader *shader)
}
static bool
shader_only_position_uses_view_index(nir_shader *shader)
shader_only_allowed_outputs_use_view_index(nir_shader *shader,
uint64_t allowed_outputs)
{
nir_shader *shader_no_position = nir_shader_clone(NULL, shader);
nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader_no_position);
/* Remove the store position from a cloned shader. */
/* Remove stores to allowed outputs from a cloned shader. */
nir_foreach_block(block, entrypoint) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
@@ -119,7 +122,7 @@ shader_only_position_uses_view_index(nir_shader *shader)
continue;
nir_variable *var = nir_intrinsic_get_var(store, 0);
if (var->data.location != VARYING_SLOT_POS)
if (!(allowed_outputs & BITFIELD64_BIT(var->data.location)))
continue;
nir_instr_remove(&store->instr);
@@ -157,22 +160,11 @@ shader_only_position_uses_view_index(nir_shader *shader)
*/
bool
nir_can_lower_multiview(nir_shader *shader)
nir_can_lower_multiview(nir_shader *shader, nir_lower_multiview_options options)
{
bool writes_position = false;
nir_foreach_shader_out_variable(var, shader) {
if (var->data.location == VARYING_SLOT_POS) {
writes_position = true;
break;
}
}
/* Don't bother handling this edge case. */
if (!writes_position)
return false;
return !shader_writes_to_memory(shader) &&
shader_only_position_uses_view_index(shader);
shader_only_allowed_outputs_use_view_index(
shader, options.allowed_per_view_outputs);
}
/**
@@ -180,28 +172,22 @@ nir_can_lower_multiview(nir_shader *shader)
*/
bool
nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
nir_lower_multiview(nir_shader *shader, nir_lower_multiview_options options)
{
assert(shader->info.stage != MESA_SHADER_FRAGMENT);
int view_count = util_bitcount(view_mask);
int view_count = util_bitcount(options.view_mask);
nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
/* Update position to refer to an array. */
nir_variable *pos_var = NULL;
/* Update per-view outputs to refer to arrays. */
nir_foreach_shader_out_variable(var, shader) {
if (var->data.location == VARYING_SLOT_POS) {
assert(var->type == glsl_vec4_type());
var->type = glsl_array_type(glsl_vec4_type(), view_count, 0);
if (options.allowed_per_view_outputs & BITFIELD64_BIT(var->data.location)) {
var->type = glsl_array_type(var->type, view_count, 0);
var->data.per_view = true;
shader->info.per_view_outputs |= VARYING_BIT_POS;
pos_var = var;
break;
shader->info.per_view_outputs |= BITFIELD64_BIT(var->data.location);
}
}
assert(pos_var);
nir_cf_list body;
nir_cf_list_extract(&body, &entrypoint->body);
@@ -222,7 +208,7 @@ nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
nir_deref_instr *view_index_deref = nir_build_deref_var(&b, view_index_var);
{
int array_position = 0;
uint32_t view_mask_temp = view_mask;
uint32_t view_mask_temp = options.view_mask;
while (view_mask_temp) {
uint32_t view_index = u_bit_scan(&view_mask_temp);
nir_store_deref(&b, nir_build_deref_array_imm(&b, view_index_deref, array_position),
@@ -238,13 +224,16 @@ nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
* break
*
* view_index = active_indices[loop_index]
* pos_deref = &pos[loop_index]
*
* out1_deref = &out1[loop_index]
* out2_deref = &out2[loop_index]
* ...
*
* # Placeholder for the body to be reinserted.
*
* loop_index += 1
*
* Later both `view_index` and `pos_deref` will be used to rewrite the
* Later both `view_index` and `outN_deref` will be used to rewrite the
* original shader body.
*/
@@ -258,14 +247,23 @@ nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
nir_def *view_index =
nir_load_deref(&b, nir_build_deref_array(&b, view_index_deref, loop_index));
nir_deref_instr *pos_deref =
nir_build_deref_array(&b, nir_build_deref_var(&b, pos_var), loop_index);
struct hash_table *out_derefs = _mesa_pointer_hash_table_create(NULL);
nir_cursor body_cursor = b.cursor;
nir_foreach_shader_out_variable(var, shader) {
if (var->data.per_view) {
nir_deref_instr *deref =
nir_build_deref_array(&b, nir_build_deref_var(&b, var), loop_index);
_mesa_hash_table_insert(out_derefs, var, (void *)deref);
body_cursor = nir_after_instr(&deref->instr);
}
}
nir_store_deref(&b, loop_index_deref, nir_iadd_imm(&b, loop_index, 1), 1);
nir_pop_loop(&b, loop);
/* Reinsert the body. */
b.cursor = nir_after_instr(&pos_deref->instr);
b.cursor = body_cursor;
nir_cf_reinsert(&body, b.cursor);
nir_foreach_block(block, entrypoint) {
@@ -283,10 +281,12 @@ nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
case nir_intrinsic_store_deref: {
nir_variable *var = nir_intrinsic_get_var(intrin, 0);
if (var == pos_var) {
struct hash_entry *entry = _mesa_hash_table_search(out_derefs, var);
if (entry) {
nir_deref_instr *new_deref = entry->data;
nir_deref_instr *old_deref = nir_src_as_deref(intrin->src[0]);
nir_src_rewrite(&intrin->src[0], &pos_deref->def);
nir_src_rewrite(&intrin->src[0], &new_deref->def);
/* Remove old deref since it has the wrong type. */
nir_deref_instr_remove_if_unused(old_deref);
@@ -294,12 +294,14 @@ nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
break;
}
case nir_intrinsic_load_deref:
if (nir_intrinsic_get_var(intrin, 0) == pos_var) {
case nir_intrinsic_load_deref: {
nir_variable *var = nir_intrinsic_get_var(intrin, 0);
if (_mesa_hash_table_search(out_derefs, var)) {
unreachable("Should have lowered I/O to temporaries "
"so no load_deref on position output is expected.");
"so no load_deref on output is expected.");
}
break;
}
case nir_intrinsic_copy_deref:
unreachable("Should have lowered copy_derefs at this point");
@@ -312,6 +314,8 @@ nir_lower_multiview(nir_shader *shader, uint32_t view_mask)
}
}
_mesa_hash_table_destroy(out_derefs, NULL);
nir_metadata_preserve(entrypoint, nir_metadata_none);
return true;
}

View File

@@ -74,9 +74,13 @@ bool
tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev)
{
bool progress = false;
nir_lower_multiview_options options = {
.view_mask = mask,
.allowed_per_view_outputs = VARYING_BIT_POS
};
if (!dev->physical_device->info->a6xx.supports_multiview_mask)
NIR_PASS(progress, nir, lower_multiview_mask, &mask);
NIR_PASS(progress, nir, lower_multiview_mask, &options.view_mask);
unsigned num_views = util_logbase2(mask) + 1;
@@ -98,13 +102,13 @@ tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev)
*/
if (!TU_DEBUG(NOMULTIPOS) &&
num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 &&
nir_can_lower_multiview(nir)) {
nir_can_lower_multiview(nir, options)) {
/* It appears that the multiview mask is ignored when multi-position
* output is enabled, so we have to write 0 to inactive views ourselves.
*/
NIR_PASS(progress, nir, lower_multiview_mask, &mask);
NIR_PASS(progress, nir, lower_multiview_mask, &options.view_mask);
NIR_PASS_V(nir, nir_lower_multiview, mask);
NIR_PASS_V(nir, nir_lower_multiview, options);
progress = true;
}

View File

@@ -204,7 +204,11 @@ anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask,
* implement multiview.
*/
if (use_primitive_replication) {
bool progress = nir_lower_multiview(shader, view_mask);
nir_lower_multiview_options options = {
.view_mask = view_mask,
.allowed_per_view_outputs = VARYING_BIT_POS
};
bool progress = nir_lower_multiview(shader, options);
if (progress) {
nir_builder b = nir_builder_at(nir_before_impl(entrypoint));
@@ -339,5 +343,9 @@ anv_check_for_primitive_replication(struct anv_device *device,
if (view_count == 1 || view_count > primitive_replication_max_views)
return false;
return nir_can_lower_multiview(shaders[MESA_SHADER_VERTEX]);
nir_lower_multiview_options options = {
.view_mask = view_mask,
.allowed_per_view_outputs = VARYING_BIT_POS
};
return nir_can_lower_multiview(shaders[MESA_SHADER_VERTEX], options);
}