ir3: Move fixup_regfootprint() to ir3_collect_info()

This fixes the case where fixup_regfootprint() adds to the reg footprint
but it isn't accounted for when determining whether we should double
threadsize in ir3_collect_info(). This would produce a hang on a650 and
above where we have a reg footprint of 33 and doubled threadsize.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18840>
This commit is contained in:
Connor Abbott
2022-09-26 18:27:53 +02:00
committed by Marge Bot
parent 7d1b8c8ab2
commit c58d633dd2
2 changed files with 53 additions and 61 deletions

View File

@@ -388,6 +388,59 @@ ir3_collect_info(struct ir3_shader_variant *v)
}
}
/* for vertex shader, the inputs are loaded into registers before the shader
* is executed, so max_regs from the shader instructions might not properly
* reflect the # of registers actually used, especially in case passthrough
* varyings.
*
* Likewise, for fragment shader, we can have some regs which are passed
* input values but never touched by the resulting shader (ie. as result
* of dead code elimination or simply because we don't know how to turn
* the reg off.
*/
for (unsigned i = 0; i < v->inputs_count; i++) {
/* skip frag inputs fetch via bary.f since their reg's are
* not written by gpu before shader starts (and in fact the
* regid's might not even be valid)
*/
if (v->inputs[i].bary)
continue;
/* ignore high regs that are global to all threads in a warp
* (they exist by default) (a5xx+)
*/
if (v->inputs[i].regid >= regid(48, 0))
continue;
if (v->inputs[i].compmask) {
unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
int32_t regid = v->inputs[i].regid + n;
if (v->inputs[i].half) {
if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
}
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
}
}
}
for (unsigned i = 0; i < v->num_sampler_prefetch; i++) {
unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
int32_t regid = v->sampler_prefetch[i].dst + n;
if (v->sampler_prefetch[i].half_precision) {
if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
}
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
}
}
/* TODO: for a5xx and below, is there a separate regfile for
* half-registers?
*/

View File

@@ -48,65 +48,6 @@ ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
return glsl_count_attribute_slots(type, false);
}
/* for vertex shader, the inputs are loaded into registers before the shader
* is executed, so max_regs from the shader instructions might not properly
* reflect the # of registers actually used, especially in case passthrough
* varyings.
*
* Likewise, for fragment shader, we can have some regs which are passed
* input values but never touched by the resulting shader (ie. as result
* of dead code elimination or simply because we don't know how to turn
* the reg off.
*/
static void
fixup_regfootprint(struct ir3_shader_variant *v)
{
unsigned i;
for (i = 0; i < v->inputs_count; i++) {
/* skip frag inputs fetch via bary.f since their reg's are
* not written by gpu before shader starts (and in fact the
* regid's might not even be valid)
*/
if (v->inputs[i].bary)
continue;
/* ignore high regs that are global to all threads in a warp
* (they exist by default) (a5xx+)
*/
if (v->inputs[i].regid >= regid(48, 0))
continue;
if (v->inputs[i].compmask) {
unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
int32_t regid = v->inputs[i].regid + n;
if (v->inputs[i].half) {
if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
}
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
}
}
}
for (i = 0; i < v->num_sampler_prefetch; i++) {
unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
int32_t regid = v->sampler_prefetch[i].dst + n;
if (v->sampler_prefetch[i].half_precision) {
if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
}
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
}
}
}
/* wrapper for ir3_assemble() which does some info fixup based on
* shader state. Non-static since used by ir3_cmdline too.
*/
@@ -170,8 +111,6 @@ ir3_shader_assemble(struct ir3_shader_variant *v)
((v->type == MESA_SHADER_COMPUTE) ||
(v->type == MESA_SHADER_KERNEL));
fixup_regfootprint(v);
return bin;
}