intel/compiler: Use an existing URB write to end TCS threads when viable
VS, TCS, TES, and GS threads must end with a URB write message with the EOT (end of thread) bit set. For VS and TES, we shadow output variables with temporaries and perform all stores at the end of the shader, giving us an existing message to do the EOT. In tessellation control shaders, we don't defer output stores until the end of the thread like we do for vertex or evaluation shaders. We just process store_output and store_per_vertex_output intrinsics where they occur, which may be in control flow. So we can't guarantee that there's a URB write being at the end of the shader. Traditionally, we've just emitted a separate URB write to finish TCS threads, doing a writemasked write to an single patch header DWord. On Broadwell, we need to set a "TR DS Cache Disable" bit, so this is a convenient spot to do so. But on other platforms, there's no such field, and this write is purely wasteful. Insetad of emitting a separate write, we can just look for an existing URB write at the end of the program and tag that with EOT, if possible. We already had code to do this for geometry shaders, so just lift it into a helper function and reuse it. No changes in shader-db. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17944>
This commit is contained in:
@@ -1511,6 +1511,34 @@ fs_visitor::resolve_source_modifiers(const fs_reg &src)
|
|||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walk backwards from the end of the program looking for a URB write that
|
||||||
|
* isn't in control flow, and mark it with EOT.
|
||||||
|
*
|
||||||
|
* Return true if successful or false if a separate EOT write is needed.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
fs_visitor::mark_last_urb_write_with_eot()
|
||||||
|
{
|
||||||
|
foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
|
||||||
|
if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) {
|
||||||
|
prev->eot = true;
|
||||||
|
|
||||||
|
/* Delete now dead instructions. */
|
||||||
|
foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {
|
||||||
|
if (dead == prev)
|
||||||
|
break;
|
||||||
|
dead->remove();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else if (prev->is_control_flow() || prev->has_side_effects()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::emit_gs_thread_end()
|
fs_visitor::emit_gs_thread_end()
|
||||||
{
|
{
|
||||||
@@ -1526,21 +1554,12 @@ fs_visitor::emit_gs_thread_end()
|
|||||||
fs_inst *inst;
|
fs_inst *inst;
|
||||||
|
|
||||||
if (gs_prog_data->static_vertex_count != -1) {
|
if (gs_prog_data->static_vertex_count != -1) {
|
||||||
foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
|
/* Try and tag the last URB write with EOT instead of emitting a whole
|
||||||
if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) {
|
* separate write just to finish the thread.
|
||||||
prev->eot = true;
|
*/
|
||||||
|
if (mark_last_urb_write_with_eot())
|
||||||
|
return;
|
||||||
|
|
||||||
/* Delete now dead instructions. */
|
|
||||||
foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {
|
|
||||||
if (dead == prev)
|
|
||||||
break;
|
|
||||||
dead->remove();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
} else if (prev->is_control_flow() || prev->has_side_effects()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
|
||||||
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||||
@@ -6555,6 +6574,31 @@ fs_visitor::set_tcs_invocation_id()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fs_visitor::emit_tcs_thread_end()
|
||||||
|
{
|
||||||
|
/* Try and tag the last URB write with EOT instead of emitting a whole
|
||||||
|
* separate write just to finish the thread. There isn't guaranteed to
|
||||||
|
* be one, so this may not succeed.
|
||||||
|
*/
|
||||||
|
if (devinfo->ver != 8 && mark_last_urb_write_with_eot())
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Emit a URB write to end the thread. On Broadwell, we use this to write
|
||||||
|
* zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
|
||||||
|
* algorithm to set it optimally). On other platforms, we simply write
|
||||||
|
* zero to a reserved/MBZ patch header DWord which has no consequence.
|
||||||
|
*/
|
||||||
|
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||||
|
srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
|
||||||
|
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
||||||
|
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
||||||
|
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||||
|
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||||
|
inst->mlen = 3;
|
||||||
|
inst->eot = true;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::run_tcs()
|
fs_visitor::run_tcs()
|
||||||
{
|
{
|
||||||
@@ -6587,15 +6631,7 @@ fs_visitor::run_tcs()
|
|||||||
bld.emit(BRW_OPCODE_ENDIF);
|
bld.emit(BRW_OPCODE_ENDIF);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Emit EOT write; set TR DS Cache bit */
|
emit_tcs_thread_end();
|
||||||
fs_reg srcs[URB_LOGICAL_NUM_SRCS];
|
|
||||||
srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
|
|
||||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
|
||||||
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
|
||||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
|
||||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
|
||||||
inst->mlen = 3;
|
|
||||||
inst->eot = true;
|
|
||||||
|
|
||||||
if (failed)
|
if (failed)
|
||||||
return false;
|
return false;
|
||||||
|
@@ -412,6 +412,8 @@ public:
|
|||||||
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
|
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
|
||||||
unsigned base_offset, const nir_src &offset_src,
|
unsigned base_offset, const nir_src &offset_src,
|
||||||
unsigned num_components, unsigned first_component);
|
unsigned num_components, unsigned first_component);
|
||||||
|
bool mark_last_urb_write_with_eot();
|
||||||
|
void emit_tcs_thread_end();
|
||||||
void emit_urb_fence();
|
void emit_urb_fence();
|
||||||
void emit_cs_terminate();
|
void emit_cs_terminate();
|
||||||
fs_reg emit_work_group_id_setup();
|
fs_reg emit_work_group_id_setup();
|
||||||
|
Reference in New Issue
Block a user