intel/fs: move lower of non-uniform at_sample barycentric to NIR
We use a non-uniform lowering loop in the backend which we can do better in NIR because we can also use divergence analysis there. This change also limits VGRF usage to a single VGRF to hold the sample ID in the backend. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24716>
This commit is contained in:

committed by
Marge Bot

parent
68027bd38e
commit
74a40cc4b6
@@ -3536,66 +3536,23 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
|
||||
const glsl_interp_mode interpolation =
|
||||
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
|
||||
|
||||
fs_reg msg_data;
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4;
|
||||
|
||||
emit_pixel_interpolater_send(bld,
|
||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||
dest,
|
||||
fs_reg(), /* src */
|
||||
brw_imm_ud(msg_data),
|
||||
interpolation);
|
||||
msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
|
||||
} else {
|
||||
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
if (nir_src_is_always_uniform(instr->src[0])) {
|
||||
const fs_reg sample_id = bld.emit_uniformize(sample_src);
|
||||
const fs_reg msg_data = vgrf(glsl_type::uint_type);
|
||||
bld.exec_all().group(1, 0)
|
||||
.SHL(msg_data, sample_id, brw_imm_ud(4u));
|
||||
emit_pixel_interpolater_send(bld,
|
||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||
dest,
|
||||
fs_reg(), /* src */
|
||||
component(msg_data, 0),
|
||||
interpolation);
|
||||
} else {
|
||||
/* Make a loop that sends a message to the pixel interpolater
|
||||
* for the sample number in each live channel. If there are
|
||||
* multiple channels with the same sample number then these
|
||||
* will be handled simultaneously with a single iteration of
|
||||
* the loop.
|
||||
*/
|
||||
bld.emit(BRW_OPCODE_DO);
|
||||
|
||||
/* Get the next live sample number into sample_id_reg */
|
||||
const fs_reg sample_id = bld.emit_uniformize(sample_src);
|
||||
|
||||
/* Set the flag register so that we can perform the send
|
||||
* message on all channels that have the same sample number
|
||||
*/
|
||||
bld.CMP(bld.null_reg_ud(),
|
||||
sample_src, sample_id,
|
||||
BRW_CONDITIONAL_EQ);
|
||||
const fs_reg msg_data = vgrf(glsl_type::uint_type);
|
||||
bld.exec_all().group(1, 0)
|
||||
.SHL(msg_data, sample_id, brw_imm_ud(4u));
|
||||
fs_inst *inst =
|
||||
emit_pixel_interpolater_send(bld,
|
||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||
dest,
|
||||
fs_reg(), /* src */
|
||||
component(msg_data, 0),
|
||||
interpolation);
|
||||
set_predicate(BRW_PREDICATE_NORMAL, inst);
|
||||
|
||||
/* Continue the loop if there are any live channels left */
|
||||
set_predicate_inv(BRW_PREDICATE_NORMAL,
|
||||
true, /* inverse */
|
||||
bld.emit(BRW_OPCODE_WHILE));
|
||||
}
|
||||
const fs_reg sample_id = bld.emit_uniformize(sample_src);
|
||||
msg_data = component(bld.group(8, 0).vgrf(BRW_REGISTER_TYPE_UD), 0);
|
||||
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
|
||||
}
|
||||
|
||||
emit_pixel_interpolater_send(bld,
|
||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||
dest,
|
||||
fs_reg(), /* src */
|
||||
msg_data,
|
||||
interpolation);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -1685,6 +1685,10 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
||||
brw_nir_optimize(nir, compiler);
|
||||
}
|
||||
|
||||
/* Do this only after the last opt_gcm. GCM will undo this lowering. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
OPT(brw_nir_lower_non_uniform_barycentric_at_sample);
|
||||
|
||||
/* Clean up LCSSA phis */
|
||||
OPT(nir_opt_remove_phis);
|
||||
|
||||
|
@@ -229,6 +229,8 @@ bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir);
|
||||
|
||||
void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
||||
|
||||
bool brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir);
|
||||
|
||||
void brw_nir_apply_key(nir_shader *nir,
|
||||
const struct brw_compiler *compiler,
|
||||
const struct brw_base_prog_key *key,
|
||||
|
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Lower non uniform at sample messages to the interpolator.
|
||||
*
|
||||
* This is pretty much identical to what nir_lower_non_uniform_access() does.
|
||||
* We do it here because otherwise GCM would undo this optimization. Also we
|
||||
* can assume divergence analysis here.
|
||||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
static bool
|
||||
brw_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)
|
||||
return false;
|
||||
|
||||
if (nir_src_is_always_uniform(intrin->src[0]) ||
|
||||
!nir_src_is_divergent(intrin->src[0]))
|
||||
return false;
|
||||
|
||||
nir_def *sample_id = intrin->src[0].ssa;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_push_loop(b);
|
||||
|
||||
nir_def *first_sample_id = nir_read_first_invocation(b, sample_id);
|
||||
|
||||
nir_push_if(b, nir_ieq(b, sample_id, first_sample_id));
|
||||
|
||||
nir_builder_instr_insert(b, &intrin->instr);
|
||||
|
||||
nir_src_rewrite(&intrin->src[0], first_sample_id);
|
||||
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir)
|
||||
{
|
||||
return nir_shader_instructions_pass(
|
||||
nir,
|
||||
brw_nir_lower_non_uniform_barycentric_at_sample_instr,
|
||||
nir_metadata_none,
|
||||
NULL);
|
||||
}
|
@@ -92,6 +92,7 @@ libintel_compiler_files = files(
|
||||
'brw_nir_lower_alpha_to_coverage.c',
|
||||
'brw_nir_lower_intersection_shader.c',
|
||||
'brw_nir_lower_non_uniform_resource_intel.c',
|
||||
'brw_nir_lower_non_uniform_barycentric_at_sample.c',
|
||||
'brw_nir_lower_ray_queries.c',
|
||||
'brw_nir_lower_rt_intrinsics.c',
|
||||
'brw_nir_lower_shader_calls.c',
|
||||
|
Reference in New Issue
Block a user