nir/lower_vars_to_scratch: calculate threshold-limited variable size separately

ir3's lowering of variables to scratch memory has to treat 8-bit values as
16-bit ones when comparing such value's size against the given threshold
since those values are handled through 16-bit half-registers. But those
values can still use natural 8-bit size and alignment for storing inside
scratch memory.

nir_lower_vars_to_scratch now accepts two size-and-alignment functions,
one used for calculating the variable size and the other for calculating
the size and alignment needed for storing inside scratch memory. Non-ir3
uses of this pass can just duplicate the currently-used function. ir3
provides a separate variable-size function that special-cases 8-bit types.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29875>
This commit is contained in:
Zan Dobersek
2024-07-14 08:59:27 +02:00
committed by Marge Bot
parent f8602612ed
commit 7fd5f76393
10 changed files with 46 additions and 13 deletions

View File

@@ -648,7 +648,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
* scratch to alloca's, assuming LLVM won't generate VGPR indexing.
*/
NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
glsl_get_natural_size_align_bytes);
glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes);
/* LLVM doesn't support VGPR indexing on GFX9. */
bool llvm_has_working_vgpr_indexing = gfx_level != GFX9;

View File

@@ -3293,7 +3293,7 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx)
/* Lower large arrays to scratch and small arrays to csel */
NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16,
glsl_get_natural_size_align_bytes);
glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes);
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
NIR_PASS(_, nir, nir_split_var_copies);
NIR_PASS(_, nir, nir_lower_global_vars_to_local);

View File

@@ -1752,6 +1752,7 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
nir_var_function_temp,
0,
glsl_get_natural_size_align_bytes,
glsl_get_natural_size_align_bytes);
NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);

View File

@@ -3685,7 +3685,7 @@ glsl_channel_type(const glsl_type *t)
}
}
static void
void
glsl_size_align_handle_array_and_structs(const glsl_type *type,
glsl_type_size_align_func size_align,
unsigned *size, unsigned *align)

View File

@@ -1350,6 +1350,9 @@ glsl_get_explicit_interface_type(const glsl_type *t, bool supports_std430)
}
}
void glsl_size_align_handle_array_and_structs(const glsl_type *type,
glsl_type_size_align_func size_align,
unsigned *size, unsigned *align);
void glsl_get_natural_size_align_bytes(const glsl_type *t, unsigned *size, unsigned *align);
void glsl_get_vec4_size_align_bytes(const glsl_type *type, unsigned *size, unsigned *align);

View File

@@ -5434,7 +5434,8 @@ bool nir_lower_io_to_temporaries(nir_shader *shader,
bool nir_lower_vars_to_scratch(nir_shader *shader,
nir_variable_mode modes,
int size_threshold,
glsl_type_size_align_func size_align);
glsl_type_size_align_func variable_size_align,
glsl_type_size_align_func scratch_layout_size_align);
void nir_lower_clip_halfz(nir_shader *shader);

View File

@@ -95,7 +95,8 @@ bool
nir_lower_vars_to_scratch(nir_shader *shader,
nir_variable_mode modes,
int size_threshold,
glsl_type_size_align_func size_align)
glsl_type_size_align_func variable_size_align,
glsl_type_size_align_func scratch_layout_size_align)
{
struct set *set = _mesa_pointer_set_create(NULL);
@@ -131,7 +132,7 @@ nir_lower_vars_to_scratch(nir_shader *shader,
continue;
unsigned var_size, var_align;
size_align(var->type, &var_size, &var_align);
variable_size_align(var->type, &var_size, &var_align);
if (var_size <= size_threshold)
continue;
@@ -207,13 +208,13 @@ nir_lower_vars_to_scratch(nir_shader *shader,
if (var->data.location == INT_MAX) {
unsigned var_size, var_align;
size_align(var->type, &var_size, &var_align);
scratch_layout_size_align(var->type, &var_size, &var_align);
var->data.location = ALIGN_POT(shader->scratch_size, var_align);
shader->scratch_size = var->data.location + var_size;
}
lower_load_store(&build, intrin, size_align);
lower_load_store(&build, intrin, scratch_layout_size_align);
impl_progress = true;
}
}

View File

@@ -181,6 +181,31 @@ ir3_lower_bit_size(const nir_instr *instr, UNUSED void *data)
return 0;
}
static void
ir3_get_variable_size_align_bytes(const glsl_type *type, unsigned *size, unsigned *align)
{
switch (type->base_type) {
case GLSL_TYPE_ARRAY:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_STRUCT:
glsl_size_align_handle_array_and_structs(type, ir3_get_variable_size_align_bytes,
size, align);
break;
case GLSL_TYPE_UINT8:
case GLSL_TYPE_INT8:
/* 8-bit values are handled through 16-bit half-registers, so the resulting size
* and alignment value has to be doubled to reflect the actual variable size
* requirement.
*/
*size = 2 * glsl_get_components(type);
*align = 2;
break;
default:
glsl_get_natural_size_align_bytes(type, size, align);
break;
}
}
#define OPT(nir, pass, ...) \
({ \
bool this_progress = false; \
@@ -828,7 +853,8 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
*/
if (so->compiler->has_pvtmem) {
progress |= OPT(s, nir_lower_vars_to_scratch, nir_var_function_temp,
16 * 16 /* bytes */, glsl_get_natural_size_align_bytes);
16 * 16 /* bytes */,
ir3_get_variable_size_align_bytes, glsl_get_natural_size_align_bytes);
}
/* Lower scratch writemasks */

View File

@@ -847,6 +847,7 @@ r600_lower_and_optimize_nir(nir_shader *sh,
nir_lower_vars_to_scratch,
nir_var_function_temp,
40,
r600_get_natural_size_align_bytes,
r600_get_natural_size_align_bytes);
while (optimize_once(sh))

View File

@@ -4937,12 +4937,12 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
* (currently unconditional for Valhall), we force vec4 alignment for
* scratch access.
*/
bool packed_tls = (gpu_id >= 0x9000);
glsl_type_size_align_func vars_to_scratch_size_align_func =
(gpu_id >= 0x9000) ? glsl_get_vec4_size_align_bytes
: glsl_get_natural_size_align_bytes;
/* Lower large arrays to scratch and small arrays to bcsel */
NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
packed_tls ? glsl_get_vec4_size_align_bytes
: glsl_get_natural_size_align_bytes);
vars_to_scratch_size_align_func, vars_to_scratch_size_align_func);
NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
NIR_PASS_V(nir, nir_split_var_copies);