intel/fs: Do a stalling MFENCE in endInvocationInterlock()
Fixes: 939312702e
"i965: Add ARB_fragment_shader_interlock support"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -1114,7 +1114,8 @@ void
|
||||
brw_memory_fence(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src,
|
||||
enum opcode send_op);
|
||||
enum opcode send_op,
|
||||
bool stall);
|
||||
|
||||
void
|
||||
brw_pixel_interpolator_query(struct brw_codegen *p,
|
||||
|
@@ -3038,10 +3038,11 @@ void
|
||||
brw_memory_fence(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src,
|
||||
enum opcode send_op)
|
||||
enum opcode send_op,
|
||||
bool stall)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
const bool commit_enable =
|
||||
const bool commit_enable = stall ||
|
||||
devinfo->gen >= 10 || /* HSD ES # 1404612949 */
|
||||
(devinfo->gen == 7 && !devinfo->is_haswell);
|
||||
struct brw_inst *insn;
|
||||
@@ -3080,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
|
||||
brw_MOV(p, dst, offset(dst, 1));
|
||||
}
|
||||
|
||||
if (stall)
|
||||
brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
|
@@ -2071,13 +2071,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
|
||||
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INTERLOCK:
|
||||
assert(devinfo->gen >= 9);
|
||||
/* The interlock is basically a memory fence issued via sendc */
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC);
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
||||
|
@@ -4273,7 +4273,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
case nir_intrinsic_memory_barrier: {
|
||||
const fs_builder ubld = bld.group(8, 0);
|
||||
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
|
||||
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
|
||||
brw_vec8_grf(0, 0), brw_imm_ud(0))
|
||||
->size_written = 2 * REG_SIZE;
|
||||
break;
|
||||
}
|
||||
@@ -5080,7 +5081,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
}
|
||||
|
||||
case nir_intrinsic_end_invocation_interlock: {
|
||||
/* We don't need to do anything here */
|
||||
/* For endInvocationInterlock(), we need to insert a memory fence which
|
||||
* stalls in the shader until the memory transactions prior to that
|
||||
* fence are complete. This ensures that the shader does not end before
|
||||
* any writes from its critical section have landed. Otherwise, you can
|
||||
* end up with a case where the next invocation on that pixel properly
|
||||
* stalls for previous FS invocation on its pixel to complete but
|
||||
* doesn't actually wait for the dataport memory transactions from that
|
||||
* thread to land before submitting its own.
|
||||
*/
|
||||
const fs_builder ubld = bld.group(8, 0);
|
||||
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
|
||||
brw_vec8_grf(0, 0), brw_imm_ud(1))
|
||||
->size_written = 2 * REG_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -1886,7 +1886,7 @@ generate_code(struct brw_codegen *p,
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
||||
|
Reference in New Issue
Block a user