intel/compiler: insert URB fence before task/mesh termination

Bspec 53421 says:
"A URB fence memory is typically performed prior the thread
exit message, so that the next thread dispatch that reads
that URB memory will see it."

Cc: 22.1 <mesa-stable>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16665>
This commit is contained in:
Marcin Ślusarz
2022-05-18 17:05:53 +02:00
committed by Marge Bot
parent 30c0f2bfbb
commit 7ebae85955
3 changed files with 24 additions and 0 deletions

View File

@@ -7039,6 +7039,8 @@ fs_visitor::run_task(bool allow_spilling)
if (failed)
return false;
emit_urb_fence();
emit_cs_terminate();
calculate_cfg();
@@ -7084,6 +7086,8 @@ fs_visitor::run_mesh(bool allow_spilling)
if (failed)
return false;
emit_urb_fence();
emit_cs_terminate();
calculate_cfg();

View File

@@ -327,6 +327,7 @@ public:
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
unsigned base_offset, const nir_src &offset_src,
unsigned num_components, unsigned first_component);
void emit_urb_fence();
void emit_cs_terminate();
fs_reg emit_work_group_id_setup();

View File

@@ -27,6 +27,7 @@
* makes it easier to do backend-specific optimizations than doing so
* in the GLSL IR or in the native code.
*/
#include "brw_eu.h"
#include "brw_fs.h"
#include "compiler/glsl_types.h"
@@ -1045,6 +1046,24 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
}
}
void
fs_visitor::emit_urb_fence()
{
fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
brw_vec8_grf(0, 0),
brw_imm_ud(true),
brw_imm_ud(0));
fence->sfid = BRW_SFID_URB;
fence->desc = lsc_fence_msg_desc(devinfo, LSC_FENCE_LOCAL,
LSC_FLUSH_TYPE_NONE, true);
bld.exec_all().group(1, 0).emit(FS_OPCODE_SCHEDULING_FENCE,
bld.null_reg_ud(),
&dst,
1);
}
void
fs_visitor::emit_cs_terminate()
{