i965: Add ARB_fragment_shader_interlock support.

Adds suppport for ARB_fragment_shader_interlock. We achieve
the interlock and fragment ordering by issuing a memory fence
via sendc.

Signed-off-by: Plamena Manolova <plamena.manolova@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
Plamena Manolova
2018-04-27 15:06:56 +01:00
parent 60e843c4d5
commit 939312702e
10 changed files with 37 additions and 8 deletions

View File

@@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_cl_event not started GL_ARB_cl_event not started
GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi) GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
GL_ARB_ES3_2_compatibility DONE (i965/gen8+) GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
GL_ARB_fragment_shader_interlock not started GL_ARB_fragment_shader_interlock DONE (i965)
GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe) GL_ARB_gpu_shader_int64 DONE (i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe)
GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014 GL_ARB_parallel_shader_compile not started, but Chia-I Wu did some related work in 2014
GL_ARB_post_depth_coverage DONE (i965, nvc0) GL_ARB_post_depth_coverage DONE (i965, nvc0)

View File

@@ -44,7 +44,7 @@ Note: some of the new features are only available with certain drivers.
</p> </p>
<ul> <ul>
<li>TBD</li> <li>GL_ARB_fragment_shader_interlock on i965</li>
</ul> </ul>
<h2>Bug fixes</h2> <h2>Bug fixes</h2>

View File

@@ -509,7 +509,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
void void
brw_memory_fence(struct brw_codegen *p, brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst); struct brw_reg dst,
enum opcode send_op);
void void
brw_pixel_interpolator_query(struct brw_codegen *p, brw_pixel_interpolator_query(struct brw_codegen *p,

View File

@@ -480,6 +480,8 @@ enum opcode {
SHADER_OPCODE_GET_BUFFER_SIZE, SHADER_OPCODE_GET_BUFFER_SIZE,
SHADER_OPCODE_INTERLOCK,
VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_MOV_BYTES,
VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM, VEC4_OPCODE_UNPACK_UNIFORM,

View File

@@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
void void
brw_memory_fence(struct brw_codegen *p, brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst) struct brw_reg dst,
enum opcode send_op)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable = const bool commit_enable =
@@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p,
/* Set dst as destination for dependency tracking, the MEMORY_FENCE /* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back. * message doesn't write anything back.
*/ */
insn = next_insn(p, BRW_OPCODE_SEND); insn = next_insn(p, send_op);
dst = retype(dst, BRW_REGISTER_TYPE_UW); dst = retype(dst, BRW_REGISTER_TYPE_UW);
brw_set_dest(p, insn, dst); brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, dst); brw_set_src0(p, insn, dst);
@@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p,
* flush it too. Use a different register so both flushes can be * flush it too. Use a different register so both flushes can be
* pipelined by the hardware. * pipelined by the hardware.
*/ */
insn = next_insn(p, BRW_OPCODE_SEND); insn = next_insn(p, send_op);
brw_set_dest(p, insn, offset(dst, 1)); brw_set_dest(p, insn, offset(dst, 1));
brw_set_src0(p, insn, offset(dst, 1)); brw_set_src0(p, insn, offset(dst, 1));
brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,

View File

@@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break; break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst); brw_memory_fence(p, dst, BRW_OPCODE_SEND);
break;
case SHADER_OPCODE_INTERLOCK:
/* The interlock is basically a memory fence issued via sendc */
brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
break; break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: { case SHADER_OPCODE_FIND_LIVE_CHANNEL: {

View File

@@ -4823,6 +4823,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break; break;
} }
case nir_intrinsic_begin_invocation_interlock: {
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 *
REG_SIZE;
break;
}
case nir_intrinsic_end_invocation_interlock: {
/* We don't need to do anything here */
break;
}
default: default:
unreachable("unknown intrinsic"); unreachable("unknown intrinsic");
} }

View File

@@ -296,6 +296,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
return "typed_surface_write_logical"; return "typed_surface_write_logical";
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
return "memory_fence"; return "memory_fence";
case SHADER_OPCODE_INTERLOCK:
/* For an interlock we actually issue a memory fence via sendc. */
return "interlock";
case SHADER_OPCODE_BYTE_SCATTERED_READ: case SHADER_OPCODE_BYTE_SCATTERED_READ:
return "byte_scattered_read"; return "byte_scattered_read";
@@ -1003,6 +1006,7 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_INTERLOCK:
case SHADER_OPCODE_URB_WRITE_SIMD8: case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:

View File

@@ -1904,7 +1904,7 @@ generate_code(struct brw_codegen *p,
break; break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst); brw_memory_fence(p, dst, BRW_OPCODE_SEND);
break; break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: { case SHADER_OPCODE_FIND_LIVE_CHANNEL: {

View File

@@ -245,6 +245,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_shader_samples_identical = true; ctx->Extensions.EXT_shader_samples_identical = true;
ctx->Extensions.OES_primitive_bounding_box = true; ctx->Extensions.OES_primitive_bounding_box = true;
ctx->Extensions.OES_texture_buffer = true; ctx->Extensions.OES_texture_buffer = true;
ctx->Extensions.ARB_fragment_shader_interlock = true;
if (can_do_pipelined_register_writes(brw->screen)) { if (can_do_pipelined_register_writes(brw->screen)) {
ctx->Extensions.ARB_draw_indirect = true; ctx->Extensions.ARB_draw_indirect = true;