intel/brw: Reduce scope of some TCS specific functions

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30169>
This commit is contained in:
Caio Oliveira
2024-07-12 16:13:14 -07:00
committed by Marge Bot
parent 47b9dc9070
commit f9ddf51b70
3 changed files with 87 additions and 90 deletions

View File

@@ -42,6 +42,90 @@ get_patch_count_threshold(int input_control_points)
return 1;
}
static void
brw_set_tcs_invocation_id(fs_visitor &s)
{
const struct intel_device_info *devinfo = s.devinfo;
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s.prog_data);
struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
const fs_builder bld = fs_builder(&s).at_end();
const unsigned instance_id_mask =
(devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
(devinfo->ver >= 11) ? INTEL_MASK(22, 16) :
INTEL_MASK(23, 17);
const unsigned instance_id_shift =
(devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17;
/* Get instance number from g0.2 bits:
* * 7:0 on DG2+
* * 22:16 on gfx11+
* * 23:17 otherwise
*/
brw_reg t =
bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)),
brw_imm_ud(instance_id_mask));
if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) {
/* gl_InvocationID is just the thread number */
s.invocation_id = bld.SHR(t, brw_imm_ud(instance_id_shift));
return;
}
assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH);
brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW);
brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD);
bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210)));
bld.MOV(channels_ud, channels_uw);
if (tcs_prog_data->instances == 1) {
s.invocation_id = channels_ud;
} else {
/* instance_id = 8 * t + <76543210> */
s.invocation_id =
bld.ADD(bld.SHR(t, brw_imm_ud(instance_id_shift - 3)), channels_ud);
}
}
static void
brw_emit_tcs_thread_end(fs_visitor &s)
{
/* Try and tag the last URB write with EOT instead of emitting a whole
* separate write just to finish the thread. There isn't guaranteed to
* be one, so this may not succeed.
*/
if (s.mark_last_urb_write_with_eot())
return;
const fs_builder bld = fs_builder(&s).at_end();
/* Emit a URB write to end the thread. On Broadwell, we use this to write
* zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
* algorithm to set it optimally). On other platforms, we simply write
* zero to a reserved/MBZ patch header DWord which has no consequence.
*/
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
reg_undef, srcs, ARRAY_SIZE(srcs));
inst->eot = true;
}
static void
brw_assign_tcs_urb_setup(fs_visitor &s)
{
assert(s.stage == MESA_SHADER_TESS_CTRL);
/* Rewrite all ATTR file references to HW_REGs. */
foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
s.convert_attr_sources_to_hw_regs(inst);
}
}
static bool
run_tcs(fs_visitor &s)
{
@@ -56,7 +140,7 @@ run_tcs(fs_visitor &s)
s.payload_ = new tcs_thread_payload(s);
/* Initialize gl_InvocationID */
s.set_tcs_invocation_id();
brw_set_tcs_invocation_id(s);
const bool fix_dispatch_mask =
vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH &&
@@ -75,7 +159,7 @@ run_tcs(fs_visitor &s)
bld.emit(BRW_OPCODE_ENDIF);
}
s.emit_tcs_thread_end();
brw_emit_tcs_thread_end(s);
if (s.failed)
return false;
@@ -85,7 +169,7 @@ run_tcs(fs_visitor &s)
brw_fs_optimize(s);
s.assign_curb_setup();
s.assign_tcs_urb_setup();
brw_assign_tcs_urb_setup(s);
brw_fs_lower_3src_null_dest(s);
brw_fs_workaround_memory_fence_before_eot(s);

View File

@@ -1357,17 +1357,6 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
}
}
void
fs_visitor::assign_tcs_urb_setup()
{
assert(stage == MESA_SHADER_TESS_CTRL);
/* Rewrite all ATTR file references to HW_REGs. */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
convert_attr_sources_to_hw_regs(inst);
}
}
void
fs_visitor::assign_tes_urb_setup()
{
@@ -2394,78 +2383,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
brw_fs_lower_scoreboard(*this);
}
void
fs_visitor::set_tcs_invocation_id()
{
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
const fs_builder bld = fs_builder(this).at_end();
const unsigned instance_id_mask =
(devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
(devinfo->ver >= 11) ? INTEL_MASK(22, 16) :
INTEL_MASK(23, 17);
const unsigned instance_id_shift =
(devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17;
/* Get instance number from g0.2 bits:
* * 7:0 on DG2+
* * 22:16 on gfx11+
* * 23:17 otherwise
*/
brw_reg t =
bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)),
brw_imm_ud(instance_id_mask));
if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) {
/* gl_InvocationID is just the thread number */
invocation_id = bld.SHR(t, brw_imm_ud(instance_id_shift));
return;
}
assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH);
brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW);
brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD);
bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210)));
bld.MOV(channels_ud, channels_uw);
if (tcs_prog_data->instances == 1) {
invocation_id = channels_ud;
} else {
/* instance_id = 8 * t + <76543210> */
invocation_id =
bld.ADD(bld.SHR(t, brw_imm_ud(instance_id_shift - 3)), channels_ud);
}
}
void
fs_visitor::emit_tcs_thread_end()
{
/* Try and tag the last URB write with EOT instead of emitting a whole
* separate write just to finish the thread. There isn't guaranteed to
* be one, so this may not succeed.
*/
if (mark_last_urb_write_with_eot())
return;
const fs_builder bld = fs_builder(this).at_end();
/* Emit a URB write to end the thread. On Broadwell, we use this to write
* zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
* algorithm to set it optimally). On other platforms, we simply write
* zero to a reserved/MBZ patch header DWord which has no consequence.
*/
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
reg_undef, srcs, ARRAY_SIZE(srcs));
inst->eot = true;
}
/**
* Move load_interpolated_input with simple (payload-based) barycentric modes
* to the top of the program so we don't emit multiple PLNs for the same input.

View File

@@ -305,7 +305,6 @@ public:
uint32_t compute_max_register_pressure();
void assign_curb_setup();
void convert_attr_sources_to_hw_regs(fs_inst *inst);
void assign_tcs_urb_setup();
void assign_tes_urb_setup();
bool assign_regs(bool allow_spilling, bool spill_all);
void assign_regs_trivial();
@@ -325,14 +324,11 @@ public:
void fail(const char *msg, ...);
void limit_dispatch_width(unsigned n, const char *msg);
void set_tcs_invocation_id();
void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg());
void emit_gs_control_data_bits(const brw_reg &vertex_count);
brw_reg gs_urb_channel_mask(const brw_reg &dword_index);
brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count);
bool mark_last_urb_write_with_eot();
void emit_tcs_thread_end();
void emit_urb_fence();
void emit_cs_terminate();