intel/fs: Do a stalling MFENCE in endInvocationInterlock()
Fixes: 939312702e
"i965: Add ARB_fragment_shader_interlock support"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -1114,7 +1114,8 @@ void
|
|||||||
brw_memory_fence(struct brw_codegen *p,
|
brw_memory_fence(struct brw_codegen *p,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src,
|
struct brw_reg src,
|
||||||
enum opcode send_op);
|
enum opcode send_op,
|
||||||
|
bool stall);
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_pixel_interpolator_query(struct brw_codegen *p,
|
brw_pixel_interpolator_query(struct brw_codegen *p,
|
||||||
|
@@ -3038,10 +3038,11 @@ void
|
|||||||
brw_memory_fence(struct brw_codegen *p,
|
brw_memory_fence(struct brw_codegen *p,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
struct brw_reg src,
|
struct brw_reg src,
|
||||||
enum opcode send_op)
|
enum opcode send_op,
|
||||||
|
bool stall)
|
||||||
{
|
{
|
||||||
const struct gen_device_info *devinfo = p->devinfo;
|
const struct gen_device_info *devinfo = p->devinfo;
|
||||||
const bool commit_enable =
|
const bool commit_enable = stall ||
|
||||||
devinfo->gen >= 10 || /* HSD ES # 1404612949 */
|
devinfo->gen >= 10 || /* HSD ES # 1404612949 */
|
||||||
(devinfo->gen == 7 && !devinfo->is_haswell);
|
(devinfo->gen == 7 && !devinfo->is_haswell);
|
||||||
struct brw_inst *insn;
|
struct brw_inst *insn;
|
||||||
@@ -3080,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
|
|||||||
brw_MOV(p, dst, offset(dst, 1));
|
brw_MOV(p, dst, offset(dst, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (stall)
|
||||||
|
brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
|
||||||
|
|
||||||
brw_pop_insn_state(p);
|
brw_pop_insn_state(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -2071,13 +2071,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_MEMORY_FENCE:
|
case SHADER_OPCODE_MEMORY_FENCE:
|
||||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
||||||
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_INTERLOCK:
|
case SHADER_OPCODE_INTERLOCK:
|
||||||
assert(devinfo->gen >= 9);
|
assert(devinfo->gen >= 9);
|
||||||
/* The interlock is basically a memory fence issued via sendc */
|
/* The interlock is basically a memory fence issued via sendc */
|
||||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC);
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
||||||
|
@@ -4273,7 +4273,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
case nir_intrinsic_memory_barrier: {
|
case nir_intrinsic_memory_barrier: {
|
||||||
const fs_builder ubld = bld.group(8, 0);
|
const fs_builder ubld = bld.group(8, 0);
|
||||||
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||||
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
|
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
|
||||||
|
brw_vec8_grf(0, 0), brw_imm_ud(0))
|
||||||
->size_written = 2 * REG_SIZE;
|
->size_written = 2 * REG_SIZE;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -5080,7 +5081,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_end_invocation_interlock: {
|
case nir_intrinsic_end_invocation_interlock: {
|
||||||
/* We don't need to do anything here */
|
/* For endInvocationInterlock(), we need to insert a memory fence which
|
||||||
|
* stalls in the shader until the memory transactions prior to that
|
||||||
|
* fence are complete. This ensures that the shader does not end before
|
||||||
|
* any writes from its critical section have landed. Otherwise, you can
|
||||||
|
* end up with a case where the next invocation on that pixel properly
|
||||||
|
* stalls for previous FS invocation on its pixel to complete but
|
||||||
|
* doesn't actually wait for the dataport memory transactions from that
|
||||||
|
* thread to land before submitting its own.
|
||||||
|
*/
|
||||||
|
const fs_builder ubld = bld.group(8, 0);
|
||||||
|
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
|
||||||
|
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
|
||||||
|
brw_vec8_grf(0, 0), brw_imm_ud(1))
|
||||||
|
->size_written = 2 * REG_SIZE;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1886,7 +1886,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_MEMORY_FENCE:
|
case SHADER_OPCODE_MEMORY_FENCE:
|
||||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND);
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
||||||
|
Reference in New Issue
Block a user