radeonsi: fix unigine heaven crash when use aco on gfx8/9

Unigine Heaven crash on GFX8/9 when use aco:
  heaven_x64: ../../amd/mesa/src/gallium/drivers/radeonsi/si_nir_lower_abi.c:813: lower_tex: Assertion `samp_index >= 0 && comp_index >= 0' failed.

GFX8/9 will clamp texture comparison value in si_nir_lower_abi,
but it has to be done after si_nir_lower_resource.

Fixes: ae933169 ("radeonsi: lower NIR resource srcs to descriptors last")
(cherry picked from commit 8609f49d05)

Conflicts:
        src/gallium/drivers/radeonsi/si_shader.c

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33113>
This commit is contained in:
Qiang Yu
2024-12-17 18:54:38 +08:00
committed by Dylan Baker
parent 9be97d8dec
commit 4c49b19a8c
5 changed files with 61 additions and 48 deletions

View File

@@ -772,51 +772,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
return true;
}
static bool lower_tex(nir_builder *b, nir_instr *instr, struct lower_abi_state *s)
{
nir_tex_instr *tex = nir_instr_as_tex(instr);
const struct si_shader_selector *sel = s->shader->selector;
enum amd_gfx_level gfx_level = sel->screen->info.gfx_level;
b->cursor = nir_before_instr(instr);
/* Section 8.23.1 (Depth Texture Comparison Mode) of the
* OpenGL 4.5 spec says:
*
* "If the textures internal format indicates a fixed-point
* depth texture, then D_t and D_ref are clamped to the
* range [0, 1]; otherwise no clamping is performed."
*
* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
* so the depth comparison value isn't clamped for Z16 and
* Z24 anymore. Do it manually here for GFX8-9; GFX10 has
* an explicitly clamped 32-bit float format.
*/
/* LLVM keep non-uniform sampler as index, so can't do this in NIR. */
if (tex->is_shadow && gfx_level >= GFX8 && gfx_level <= GFX9 && sel->info.base.use_aco_amd) {
int samp_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle);
int comp_index = nir_tex_instr_src_index(tex, nir_tex_src_comparator);
assert(samp_index >= 0 && comp_index >= 0);
nir_def *sampler = tex->src[samp_index].src.ssa;
nir_def *compare = tex->src[comp_index].src.ssa;
/* Must have been lowered to descriptor. */
assert(sampler->num_components > 1);
nir_def *upgraded = nir_channel(b, sampler, 3);
upgraded = nir_i2b(b, nir_ubfe_imm(b, upgraded, 29, 1));
nir_def *clamped = nir_fsat(b, compare);
compare = nir_bcsel(b, upgraded, clamped, compare);
nir_src_rewrite(&tex->src[comp_index].src, compare);
return true;
}
return false;
}
bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args)
{
struct lower_abi_state state = {
@@ -835,8 +790,6 @@ bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shade
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_intrinsic)
progress |= lower_intrinsic(&b, instr, &state);
else if (instr->type == nir_instr_type_tex)
progress |= lower_tex(&b, instr, &state);
}
}

View File

@@ -2242,6 +2242,55 @@ bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *sha
return shader->info.stage == MESA_SHADER_COMPUTE && shader->info.shared_size > 0 && sscreen->options.clear_lds;
}
static bool clamp_shadow_comparison_value(nir_builder *b, nir_instr *instr, void *state)
{
if (instr->type != nir_instr_type_tex)
return false;
nir_tex_instr *tex = nir_instr_as_tex(instr);
if (!tex->is_shadow)
return false;
b->cursor = nir_before_instr(instr);
int samp_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle);
int comp_index = nir_tex_instr_src_index(tex, nir_tex_src_comparator);
assert(samp_index >= 0 && comp_index >= 0);
nir_def *sampler = tex->src[samp_index].src.ssa;
nir_def *compare = tex->src[comp_index].src.ssa;
/* Must have been lowered to descriptor. */
assert(sampler->num_components > 1);
nir_def *upgraded = nir_channel(b, sampler, 3);
upgraded = nir_i2b(b, nir_ubfe_imm(b, upgraded, 29, 1));
nir_def *clamped = nir_fsat(b, compare);
compare = nir_bcsel(b, upgraded, clamped, compare);
nir_src_rewrite(&tex->src[comp_index].src, compare);
return true;
}
static bool si_nir_clamp_shadow_comparison_value(nir_shader *nir)
{
/* Section 8.23.1 (Depth Texture Comparison Mode) of the
* OpenGL 4.5 spec says:
*
* "If the textures internal format indicates a fixed-point
* depth texture, then D_t and D_ref are clamped to the
* range [0, 1]; otherwise no clamping is performed."
*
* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
* so the depth comparison value isn't clamped for Z16 and
* Z24 anymore. Do it manually here for GFX8-9; GFX10 has
* an explicitly clamped 32-bit float format.
*/
return nir_shader_instructions_pass(nir, clamp_shadow_comparison_value,
nir_metadata_control_flow,
NULL);
}
struct nir_shader *si_get_nir_shader(struct si_shader *shader,
struct si_shader_args *args,
bool *free_nir,
@@ -2557,6 +2606,14 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
/* This must be after vectorization because it causes bindings_different_restrict() to fail. */
NIR_PASS(progress, nir, si_nir_lower_resource, shader, args);
/* LLVM keep non-uniform sampler as index, so can't do this in NIR.
* Must be done after si_nir_lower_resource().
*/
if (sel->info.base.use_aco_amd && sel->info.has_shadow_comparison &&
sel->screen->info.gfx_level >= GFX8 && sel->screen->info.gfx_level <= GFX9) {
NIR_PASS(progress, nir, si_nir_clamp_shadow_comparison_value);
}
if (progress) {
si_nir_opts(sel->screen, nir, false);
progress = false;

View File

@@ -549,6 +549,7 @@ struct si_shader_info {
bool uses_sampleid;
bool uses_layer_id;
bool has_non_uniform_tex_access;
bool has_shadow_comparison;
bool uses_vmem_sampler_or_bvh;
bool uses_vmem_load_other; /* all other VMEM loads and atomics with return */

View File

@@ -443,6 +443,8 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
info->has_non_uniform_tex_access =
tex->texture_non_uniform || tex->sampler_non_uniform;
info->has_shadow_comparison |= tex->is_shadow;
} else if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name;