intel/fs: move lower of non-uniform at_sample barycentric to NIR
We use a non-uniform lowering loop in the backend which we can do better in NIR because we can also use divergence analysis there. This change also limits VGRF usage to a single VGRF to hold the sample ID in the backend. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24716>
This commit is contained in:

committed by
Marge Bot

parent
68027bd38e
commit
74a40cc4b6
@@ -3536,66 +3536,23 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
|
|||||||
const glsl_interp_mode interpolation =
|
const glsl_interp_mode interpolation =
|
||||||
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
|
(enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
|
||||||
|
|
||||||
|
fs_reg msg_data;
|
||||||
if (nir_src_is_const(instr->src[0])) {
|
if (nir_src_is_const(instr->src[0])) {
|
||||||
unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4;
|
msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
|
||||||
|
|
||||||
emit_pixel_interpolater_send(bld,
|
|
||||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
|
||||||
dest,
|
|
||||||
fs_reg(), /* src */
|
|
||||||
brw_imm_ud(msg_data),
|
|
||||||
interpolation);
|
|
||||||
} else {
|
} else {
|
||||||
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
|
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
|
||||||
BRW_REGISTER_TYPE_UD);
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
const fs_reg sample_id = bld.emit_uniformize(sample_src);
|
||||||
if (nir_src_is_always_uniform(instr->src[0])) {
|
msg_data = component(bld.group(8, 0).vgrf(BRW_REGISTER_TYPE_UD), 0);
|
||||||
const fs_reg sample_id = bld.emit_uniformize(sample_src);
|
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
|
||||||
const fs_reg msg_data = vgrf(glsl_type::uint_type);
|
|
||||||
bld.exec_all().group(1, 0)
|
|
||||||
.SHL(msg_data, sample_id, brw_imm_ud(4u));
|
|
||||||
emit_pixel_interpolater_send(bld,
|
|
||||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
|
||||||
dest,
|
|
||||||
fs_reg(), /* src */
|
|
||||||
component(msg_data, 0),
|
|
||||||
interpolation);
|
|
||||||
} else {
|
|
||||||
/* Make a loop that sends a message to the pixel interpolater
|
|
||||||
* for the sample number in each live channel. If there are
|
|
||||||
* multiple channels with the same sample number then these
|
|
||||||
* will be handled simultaneously with a single iteration of
|
|
||||||
* the loop.
|
|
||||||
*/
|
|
||||||
bld.emit(BRW_OPCODE_DO);
|
|
||||||
|
|
||||||
/* Get the next live sample number into sample_id_reg */
|
|
||||||
const fs_reg sample_id = bld.emit_uniformize(sample_src);
|
|
||||||
|
|
||||||
/* Set the flag register so that we can perform the send
|
|
||||||
* message on all channels that have the same sample number
|
|
||||||
*/
|
|
||||||
bld.CMP(bld.null_reg_ud(),
|
|
||||||
sample_src, sample_id,
|
|
||||||
BRW_CONDITIONAL_EQ);
|
|
||||||
const fs_reg msg_data = vgrf(glsl_type::uint_type);
|
|
||||||
bld.exec_all().group(1, 0)
|
|
||||||
.SHL(msg_data, sample_id, brw_imm_ud(4u));
|
|
||||||
fs_inst *inst =
|
|
||||||
emit_pixel_interpolater_send(bld,
|
|
||||||
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
|
||||||
dest,
|
|
||||||
fs_reg(), /* src */
|
|
||||||
component(msg_data, 0),
|
|
||||||
interpolation);
|
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, inst);
|
|
||||||
|
|
||||||
/* Continue the loop if there are any live channels left */
|
|
||||||
set_predicate_inv(BRW_PREDICATE_NORMAL,
|
|
||||||
true, /* inverse */
|
|
||||||
bld.emit(BRW_OPCODE_WHILE));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
emit_pixel_interpolater_send(bld,
|
||||||
|
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
|
||||||
|
dest,
|
||||||
|
fs_reg(), /* src */
|
||||||
|
msg_data,
|
||||||
|
interpolation);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1685,6 +1685,10 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
|||||||
brw_nir_optimize(nir, compiler);
|
brw_nir_optimize(nir, compiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Do this only after the last opt_gcm. GCM will undo this lowering. */
|
||||||
|
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||||
|
OPT(brw_nir_lower_non_uniform_barycentric_at_sample);
|
||||||
|
|
||||||
/* Clean up LCSSA phis */
|
/* Clean up LCSSA phis */
|
||||||
OPT(nir_opt_remove_phis);
|
OPT(nir_opt_remove_phis);
|
||||||
|
|
||||||
|
@@ -229,6 +229,8 @@ bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir);
|
|||||||
|
|
||||||
void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
||||||
|
|
||||||
|
bool brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir);
|
||||||
|
|
||||||
void brw_nir_apply_key(nir_shader *nir,
|
void brw_nir_apply_key(nir_shader *nir,
|
||||||
const struct brw_compiler *compiler,
|
const struct brw_compiler *compiler,
|
||||||
const struct brw_base_prog_key *key,
|
const struct brw_base_prog_key *key,
|
||||||
|
@@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2023 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lower non uniform at sample messages to the interpolator.
|
||||||
|
*
|
||||||
|
* This is pretty much identical to what nir_lower_non_uniform_access() does.
|
||||||
|
* We do it here because otherwise GCM would undo this optimization. Also we
|
||||||
|
* can assume divergence analysis here.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "brw_nir.h"
|
||||||
|
#include "compiler/nir/nir_builder.h"
|
||||||
|
|
||||||
|
static bool
|
||||||
|
brw_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b,
|
||||||
|
nir_instr *instr,
|
||||||
|
void *cb_data)
|
||||||
|
{
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (nir_src_is_always_uniform(intrin->src[0]) ||
|
||||||
|
!nir_src_is_divergent(intrin->src[0]))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_def *sample_id = intrin->src[0].ssa;
|
||||||
|
|
||||||
|
b->cursor = nir_instr_remove(&intrin->instr);
|
||||||
|
|
||||||
|
nir_push_loop(b);
|
||||||
|
|
||||||
|
nir_def *first_sample_id = nir_read_first_invocation(b, sample_id);
|
||||||
|
|
||||||
|
nir_push_if(b, nir_ieq(b, sample_id, first_sample_id));
|
||||||
|
|
||||||
|
nir_builder_instr_insert(b, &intrin->instr);
|
||||||
|
|
||||||
|
nir_src_rewrite(&intrin->src[0], first_sample_id);
|
||||||
|
|
||||||
|
nir_jump(b, nir_jump_break);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir)
|
||||||
|
{
|
||||||
|
return nir_shader_instructions_pass(
|
||||||
|
nir,
|
||||||
|
brw_nir_lower_non_uniform_barycentric_at_sample_instr,
|
||||||
|
nir_metadata_none,
|
||||||
|
NULL);
|
||||||
|
}
|
@@ -92,6 +92,7 @@ libintel_compiler_files = files(
|
|||||||
'brw_nir_lower_alpha_to_coverage.c',
|
'brw_nir_lower_alpha_to_coverage.c',
|
||||||
'brw_nir_lower_intersection_shader.c',
|
'brw_nir_lower_intersection_shader.c',
|
||||||
'brw_nir_lower_non_uniform_resource_intel.c',
|
'brw_nir_lower_non_uniform_resource_intel.c',
|
||||||
|
'brw_nir_lower_non_uniform_barycentric_at_sample.c',
|
||||||
'brw_nir_lower_ray_queries.c',
|
'brw_nir_lower_ray_queries.c',
|
||||||
'brw_nir_lower_rt_intrinsics.c',
|
'brw_nir_lower_rt_intrinsics.c',
|
||||||
'brw_nir_lower_shader_calls.c',
|
'brw_nir_lower_shader_calls.c',
|
||||||
|
Reference in New Issue
Block a user