intel/fs: Do a stalling MFENCE in endInvocationInterlock()

Fixes: 939312702e "i965: Add ARB_fragment_shader_interlock support"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand
2019-05-22 12:36:17 -05:00
parent 859de4a748
commit 9e403dc56e
5 changed files with 28 additions and 8 deletions

View File

@@ -1114,7 +1114,8 @@ void
brw_memory_fence(struct brw_codegen *p, brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst, struct brw_reg dst,
struct brw_reg src, struct brw_reg src,
enum opcode send_op); enum opcode send_op,
bool stall);
void void
brw_pixel_interpolator_query(struct brw_codegen *p, brw_pixel_interpolator_query(struct brw_codegen *p,

View File

@@ -3038,10 +3038,11 @@ void
brw_memory_fence(struct brw_codegen *p, brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst, struct brw_reg dst,
struct brw_reg src, struct brw_reg src,
enum opcode send_op) enum opcode send_op,
bool stall)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable = const bool commit_enable = stall ||
devinfo->gen >= 10 || /* HSD ES # 1404612949 */ devinfo->gen >= 10 || /* HSD ES # 1404612949 */
(devinfo->gen == 7 && !devinfo->is_haswell); (devinfo->gen == 7 && !devinfo->is_haswell);
struct brw_inst *insn; struct brw_inst *insn;
@@ -3080,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
brw_MOV(p, dst, offset(dst, 1)); brw_MOV(p, dst, offset(dst, 1));
} }
if (stall)
brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
brw_pop_insn_state(p); brw_pop_insn_state(p);
} }

View File

@@ -2071,13 +2071,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break; break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND); assert(src[1].file == BRW_IMMEDIATE_VALUE);
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
break; break;
case SHADER_OPCODE_INTERLOCK: case SHADER_OPCODE_INTERLOCK:
assert(devinfo->gen >= 9); assert(devinfo->gen >= 9);
/* The interlock is basically a memory fence issued via sendc */ /* The interlock is basically a memory fence issued via sendc */
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC); brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
break; break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: { case SHADER_OPCODE_FIND_LIVE_CHANNEL: {

View File

@@ -4273,7 +4273,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_memory_barrier: { case nir_intrinsic_memory_barrier: {
const fs_builder ubld = bld.group(8, 0); const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0)) ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
brw_vec8_grf(0, 0), brw_imm_ud(0))
->size_written = 2 * REG_SIZE; ->size_written = 2 * REG_SIZE;
break; break;
} }
@@ -5080,7 +5081,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
} }
case nir_intrinsic_end_invocation_interlock: { case nir_intrinsic_end_invocation_interlock: {
/* We don't need to do anything here */ /* For endInvocationInterlock(), we need to insert a memory fence which
* stalls in the shader until the memory transactions prior to that
* fence are complete. This ensures that the shader does not end before
* any writes from its critical section have landed. Otherwise, you can
* end up with a case where the next invocation on that pixel properly
* stalls for previous FS invocation on its pixel to complete but
* doesn't actually wait for the dataport memory transactions from that
* thread to land before submitting its own.
*/
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
brw_vec8_grf(0, 0), brw_imm_ud(1))
->size_written = 2 * REG_SIZE;
break; break;
} }

View File

@@ -1886,7 +1886,7 @@ generate_code(struct brw_codegen *p,
break; break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND); brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
break; break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: { case SHADER_OPCODE_FIND_LIVE_CHANNEL: {