diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2924650c8ed..9ae36e9f89b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6360,6 +6360,35 @@ needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst) return false; } +/* Wa_14017989577 + * + * The first instruction of any kernel should have non-zero emask. + * Make sure this happens by introducing a dummy mov instruction. + */ +void +fs_visitor::emit_dummy_mov_instruction() +{ + if (devinfo->verx10 < 120) + return; + + struct backend_instruction *first_inst = + cfg->first_block()->start(); + + /* We can skip the WA if first instruction is marked with + * force_writemask_all or exec_size equals dispatch_width. + */ + if (first_inst->force_writemask_all || + first_inst->exec_size == dispatch_width) + return; + + /* Insert dummy mov as first instruction. */ + const fs_builder ubld = + bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0); + ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u)); + + invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); +} + /* Wa_22013689345 * * We need to emit UGM fence message before EOT, if shader has any UGM write @@ -6720,6 +6749,10 @@ fs_visitor::run_vs() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6842,6 +6875,10 @@ fs_visitor::run_tcs() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6870,6 +6907,10 @@ fs_visitor::run_tes() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6914,6 +6955,10 @@ fs_visitor::run_gs() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -7014,6 +7059,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); } @@ -7050,6 +7098,10 @@ fs_visitor::run_cs(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; @@ -7078,6 +7130,10 @@ fs_visitor::run_bs(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; @@ -7107,6 +7163,10 @@ fs_visitor::run_task(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; @@ -7136,6 +7196,10 @@ fs_visitor::run_mesh(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index bbc83ac7bfc..d42d9ef901d 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -224,6 +224,7 @@ public: bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); void emit_dummy_memory_fence_before_eot(); + void emit_dummy_mov_instruction(); bool fixup_nomask_control_flow(); void assign_curb_setup(); void assign_urb_setup();