intel/fs: Do a stalling MFENCE in endInvocationInterlock()

Fixes: 939312702e "i965: Add ARB_fragment_shader_interlock support"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand
2019-05-22 12:36:17 -05:00
parent 859de4a748
commit 9e403dc56e
5 changed files with 28 additions and 8 deletions

View File

@@ -1114,7 +1114,8 @@ void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg src,
enum opcode send_op);
enum opcode send_op,
bool stall);
void
brw_pixel_interpolator_query(struct brw_codegen *p,

View File

@@ -3038,10 +3038,11 @@ void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg src,
enum opcode send_op)
enum opcode send_op,
bool stall)
{
const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable =
const bool commit_enable = stall ||
devinfo->gen >= 10 || /* HSD ES # 1404612949 */
(devinfo->gen == 7 && !devinfo->is_haswell);
struct brw_inst *insn;
@@ -3080,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
brw_MOV(p, dst, offset(dst, 1));
}
if (stall)
brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
brw_pop_insn_state(p);
}

View File

@@ -2071,13 +2071,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
assert(src[1].file == BRW_IMMEDIATE_VALUE);
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
break;
case SHADER_OPCODE_INTERLOCK:
assert(devinfo->gen >= 9);
/* The interlock is basically a memory fence issued via sendc */
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC);
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {

View File

@@ -4273,7 +4273,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_memory_barrier: {
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
brw_vec8_grf(0, 0), brw_imm_ud(0))
->size_written = 2 * REG_SIZE;
break;
}
@@ -5080,7 +5081,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
case nir_intrinsic_end_invocation_interlock: {
/* We don't need to do anything here */
/* For endInvocationInterlock(), we need to insert a memory fence which
* stalls in the shader until the memory transactions prior to that
* fence are complete. This ensures that the shader does not end before
* any writes from its critical section have landed. Otherwise, you can
* end up with a case where the next invocation on that pixel properly
* stalls for previous FS invocation on its pixel to complete but
* doesn't actually wait for the dataport memory transactions from that
* thread to land before submitting its own.
*/
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
brw_vec8_grf(0, 0), brw_imm_ud(1))
->size_written = 2 * REG_SIZE;
break;
}

View File

@@ -1886,7 +1886,7 @@ generate_code(struct brw_codegen *p,
break;
case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {