nir/lower_vars_to_scratch: calculate threshold-limited variable size separately
ir3's lowering of variables to scratch memory has to treat 8-bit values as 16-bit ones when comparing such value's size against the given threshold since those values are handled through 16-bit half-registers. But those values can still use natural 8-bit size and alignment for storing inside scratch memory. nir_lower_vars_to_scratch now accepts two size-and-alignment functions, one used for calculating the variable size and the other for calculating the size and alignment needed for storing inside scratch memory. Non-ir3 uses of this pass can just duplicate the currently-used function. ir3 provides a separate variable-size function that special-cases 8-bit types. Signed-off-by: Zan Dobersek <zdobersek@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29875>
This commit is contained in:
@@ -648,7 +648,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
|
||||
* scratch to alloca's, assuming LLVM won't generate VGPR indexing.
|
||||
*/
|
||||
NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes);
|
||||
|
||||
/* LLVM doesn't support VGPR indexing on GFX9. */
|
||||
bool llvm_has_working_vgpr_indexing = gfx_level != GFX9;
|
||||
|
@@ -3293,7 +3293,7 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx)
|
||||
|
||||
/* Lower large arrays to scratch and small arrays to csel */
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes);
|
||||
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
|
||||
|
@@ -1752,6 +1752,7 @@ v3d_attempt_compile(struct v3d_compile *c)
|
||||
NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
|
||||
nir_var_function_temp,
|
||||
0,
|
||||
glsl_get_natural_size_align_bytes,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);
|
||||
|
@@ -3685,7 +3685,7 @@ glsl_channel_type(const glsl_type *t)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
glsl_size_align_handle_array_and_structs(const glsl_type *type,
|
||||
glsl_type_size_align_func size_align,
|
||||
unsigned *size, unsigned *align)
|
||||
|
@@ -1350,6 +1350,9 @@ glsl_get_explicit_interface_type(const glsl_type *t, bool supports_std430)
|
||||
}
|
||||
}
|
||||
|
||||
void glsl_size_align_handle_array_and_structs(const glsl_type *type,
|
||||
glsl_type_size_align_func size_align,
|
||||
unsigned *size, unsigned *align);
|
||||
void glsl_get_natural_size_align_bytes(const glsl_type *t, unsigned *size, unsigned *align);
|
||||
void glsl_get_vec4_size_align_bytes(const glsl_type *type, unsigned *size, unsigned *align);
|
||||
|
||||
|
@@ -5434,7 +5434,8 @@ bool nir_lower_io_to_temporaries(nir_shader *shader,
|
||||
bool nir_lower_vars_to_scratch(nir_shader *shader,
|
||||
nir_variable_mode modes,
|
||||
int size_threshold,
|
||||
glsl_type_size_align_func size_align);
|
||||
glsl_type_size_align_func variable_size_align,
|
||||
glsl_type_size_align_func scratch_layout_size_align);
|
||||
|
||||
void nir_lower_clip_halfz(nir_shader *shader);
|
||||
|
||||
|
@@ -95,7 +95,8 @@ bool
|
||||
nir_lower_vars_to_scratch(nir_shader *shader,
|
||||
nir_variable_mode modes,
|
||||
int size_threshold,
|
||||
glsl_type_size_align_func size_align)
|
||||
glsl_type_size_align_func variable_size_align,
|
||||
glsl_type_size_align_func scratch_layout_size_align)
|
||||
{
|
||||
struct set *set = _mesa_pointer_set_create(NULL);
|
||||
|
||||
@@ -131,7 +132,7 @@ nir_lower_vars_to_scratch(nir_shader *shader,
|
||||
continue;
|
||||
|
||||
unsigned var_size, var_align;
|
||||
size_align(var->type, &var_size, &var_align);
|
||||
variable_size_align(var->type, &var_size, &var_align);
|
||||
if (var_size <= size_threshold)
|
||||
continue;
|
||||
|
||||
@@ -207,13 +208,13 @@ nir_lower_vars_to_scratch(nir_shader *shader,
|
||||
|
||||
if (var->data.location == INT_MAX) {
|
||||
unsigned var_size, var_align;
|
||||
size_align(var->type, &var_size, &var_align);
|
||||
scratch_layout_size_align(var->type, &var_size, &var_align);
|
||||
|
||||
var->data.location = ALIGN_POT(shader->scratch_size, var_align);
|
||||
shader->scratch_size = var->data.location + var_size;
|
||||
}
|
||||
|
||||
lower_load_store(&build, intrin, size_align);
|
||||
lower_load_store(&build, intrin, scratch_layout_size_align);
|
||||
impl_progress = true;
|
||||
}
|
||||
}
|
||||
|
@@ -181,6 +181,31 @@ ir3_lower_bit_size(const nir_instr *instr, UNUSED void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
ir3_get_variable_size_align_bytes(const glsl_type *type, unsigned *size, unsigned *align)
|
||||
{
|
||||
switch (type->base_type) {
|
||||
case GLSL_TYPE_ARRAY:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
case GLSL_TYPE_STRUCT:
|
||||
glsl_size_align_handle_array_and_structs(type, ir3_get_variable_size_align_bytes,
|
||||
size, align);
|
||||
break;
|
||||
case GLSL_TYPE_UINT8:
|
||||
case GLSL_TYPE_INT8:
|
||||
/* 8-bit values are handled through 16-bit half-registers, so the resulting size
|
||||
* and alignment value has to be doubled to reflect the actual variable size
|
||||
* requirement.
|
||||
*/
|
||||
*size = 2 * glsl_get_components(type);
|
||||
*align = 2;
|
||||
break;
|
||||
default:
|
||||
glsl_get_natural_size_align_bytes(type, size, align);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#define OPT(nir, pass, ...) \
|
||||
({ \
|
||||
bool this_progress = false; \
|
||||
@@ -828,7 +853,8 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
||||
*/
|
||||
if (so->compiler->has_pvtmem) {
|
||||
progress |= OPT(s, nir_lower_vars_to_scratch, nir_var_function_temp,
|
||||
16 * 16 /* bytes */, glsl_get_natural_size_align_bytes);
|
||||
16 * 16 /* bytes */,
|
||||
ir3_get_variable_size_align_bytes, glsl_get_natural_size_align_bytes);
|
||||
}
|
||||
|
||||
/* Lower scratch writemasks */
|
||||
|
@@ -847,6 +847,7 @@ r600_lower_and_optimize_nir(nir_shader *sh,
|
||||
nir_lower_vars_to_scratch,
|
||||
nir_var_function_temp,
|
||||
40,
|
||||
r600_get_natural_size_align_bytes,
|
||||
r600_get_natural_size_align_bytes);
|
||||
|
||||
while (optimize_once(sh))
|
||||
|
@@ -4937,12 +4937,12 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
|
||||
* (currently unconditional for Valhall), we force vec4 alignment for
|
||||
* scratch access.
|
||||
*/
|
||||
bool packed_tls = (gpu_id >= 0x9000);
|
||||
|
||||
glsl_type_size_align_func vars_to_scratch_size_align_func =
|
||||
(gpu_id >= 0x9000) ? glsl_get_vec4_size_align_bytes
|
||||
: glsl_get_natural_size_align_bytes;
|
||||
/* Lower large arrays to scratch and small arrays to bcsel */
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
|
||||
packed_tls ? glsl_get_vec4_size_align_bytes
|
||||
: glsl_get_natural_size_align_bytes);
|
||||
vars_to_scratch_size_align_func, vars_to_scratch_size_align_func);
|
||||
NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
|
||||
|
||||
NIR_PASS_V(nir, nir_split_var_copies);
|
||||
|
Reference in New Issue
Block a user