intel/fs: move lower of non-uniform at_sample barycentric to NIR

We use a non-uniform lowering loop in the backend which we can do
better in NIR because we can also use divergence analysis there.

This change also limits VGRF usage to a single VGRF to hold the sample
ID in the backend.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24716>
This commit is contained in:
Lionel Landwerlin
2023-08-24 01:23:00 +03:00
committed by Marge Bot
parent 68027bd38e
commit 74a40cc4b6
5 changed files with 97 additions and 55 deletions

View File

@@ -3536,66 +3536,23 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
const glsl_interp_mode interpolation =
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
fs_reg msg_data;
if (nir_src_is_const(instr->src[0])) {
unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dest,
fs_reg(), /* src */
brw_imm_ud(msg_data),
interpolation);
msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
} else {
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_UD);
if (nir_src_is_always_uniform(instr->src[0])) {
const fs_reg sample_id = bld.emit_uniformize(sample_src);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
bld.exec_all().group(1, 0)
.SHL(msg_data, sample_id, brw_imm_ud(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dest,
fs_reg(), /* src */
component(msg_data, 0),
interpolation);
} else {
/* Make a loop that sends a message to the pixel interpolater
* for the sample number in each live channel. If there are
* multiple channels with the same sample number then these
* will be handled simultaneously with a single iteration of
* the loop.
*/
bld.emit(BRW_OPCODE_DO);
/* Get the next live sample number into sample_id_reg */
const fs_reg sample_id = bld.emit_uniformize(sample_src);
/* Set the flag register so that we can perform the send
* message on all channels that have the same sample number
*/
bld.CMP(bld.null_reg_ud(),
sample_src, sample_id,
BRW_CONDITIONAL_EQ);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
bld.exec_all().group(1, 0)
.SHL(msg_data, sample_id, brw_imm_ud(4u));
fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dest,
fs_reg(), /* src */
component(msg_data, 0),
interpolation);
set_predicate(BRW_PREDICATE_NORMAL, inst);
/* Continue the loop if there are any live channels left */
set_predicate_inv(BRW_PREDICATE_NORMAL,
true, /* inverse */
bld.emit(BRW_OPCODE_WHILE));
}
const fs_reg sample_id = bld.emit_uniformize(sample_src);
msg_data = component(bld.group(8, 0).vgrf(BRW_REGISTER_TYPE_UD), 0);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
}
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dest,
fs_reg(), /* src */
msg_data,
interpolation);
break;
}

View File

@@ -1685,6 +1685,10 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
brw_nir_optimize(nir, compiler);
}
/* Do this only after the last opt_gcm. GCM will undo this lowering. */
if (nir->info.stage == MESA_SHADER_FRAGMENT)
OPT(brw_nir_lower_non_uniform_barycentric_at_sample);
/* Clean up LCSSA phis */
OPT(nir_opt_remove_phis);

View File

@@ -229,6 +229,8 @@ bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir);
void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
bool brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir);
void brw_nir_apply_key(nir_shader *nir,
const struct brw_compiler *compiler,
const struct brw_base_prog_key *key,

View File

@@ -0,0 +1,78 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/*
* Lower non uniform at sample messages to the interpolator.
*
* This is pretty much identical to what nir_lower_non_uniform_access() does.
* We do it here because otherwise GCM would undo this optimization. Also we
* can assume divergence analysis here.
*/
#include "brw_nir.h"
#include "compiler/nir/nir_builder.h"
static bool
brw_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b,
nir_instr *instr,
void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)
return false;
if (nir_src_is_always_uniform(intrin->src[0]) ||
!nir_src_is_divergent(intrin->src[0]))
return false;
nir_def *sample_id = intrin->src[0].ssa;
b->cursor = nir_instr_remove(&intrin->instr);
nir_push_loop(b);
nir_def *first_sample_id = nir_read_first_invocation(b, sample_id);
nir_push_if(b, nir_ieq(b, sample_id, first_sample_id));
nir_builder_instr_insert(b, &intrin->instr);
nir_src_rewrite(&intrin->src[0], first_sample_id);
nir_jump(b, nir_jump_break);
return true;
}
bool
brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir)
{
return nir_shader_instructions_pass(
nir,
brw_nir_lower_non_uniform_barycentric_at_sample_instr,
nir_metadata_none,
NULL);
}

View File

@@ -92,6 +92,7 @@ libintel_compiler_files = files(
'brw_nir_lower_alpha_to_coverage.c',
'brw_nir_lower_intersection_shader.c',
'brw_nir_lower_non_uniform_resource_intel.c',
'brw_nir_lower_non_uniform_barycentric_at_sample.c',
'brw_nir_lower_ray_queries.c',
'brw_nir_lower_rt_intrinsics.c',
'brw_nir_lower_shader_calls.c',