intel/fs: Use SHADER_OPCODE_SEND for surface messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
Jason Ekstrand
2018-10-30 12:23:44 -05:00
committed by Jason Ekstrand
parent 7f1cf046cd
commit d2d3e04501
5 changed files with 201 additions and 214 deletions

View File

@@ -790,17 +790,6 @@ brw_untyped_atomic(struct brw_codegen *p,
bool response_expected, bool response_expected,
bool header_present); bool header_present);
void
brw_untyped_atomic_float(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
bool response_expected,
bool header_present);
void void
brw_untyped_surface_read(struct brw_codegen *p, brw_untyped_surface_read(struct brw_codegen *p,
struct brw_reg dst, struct brw_reg dst,
@@ -844,22 +833,6 @@ brw_typed_surface_write(struct brw_codegen *p,
unsigned num_channels, unsigned num_channels,
bool header_present); bool header_present);
void
brw_byte_scattered_read(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned bit_size);
void
brw_byte_scattered_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned bit_size,
bool header_present);
void void
brw_memory_fence(struct brw_codegen *p, brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst, struct brw_reg dst,

View File

@@ -2785,35 +2785,6 @@ brw_untyped_atomic(struct brw_codegen *p,
payload, surface, desc); payload, surface, desc);
} }
void
brw_untyped_atomic_float(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
bool response_expected,
bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen >= 9);
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
const unsigned response_length =
brw_surface_payload_size(p, response_expected, exec_size);
const unsigned desc =
brw_message_desc(devinfo, msg_length, response_length, header_present) |
brw_dp_untyped_atomic_float_desc(devinfo, exec_size, atomic_op,
response_expected);
brw_send_indirect_surface_message(p, sfid,
brw_writemask(dst, WRITEMASK_XYZW),
payload, surface, desc);
}
void void
brw_untyped_surface_read(struct brw_codegen *p, brw_untyped_surface_read(struct brw_codegen *p,
struct brw_reg dst, struct brw_reg dst,
@@ -2864,49 +2835,6 @@ brw_untyped_surface_write(struct brw_codegen *p,
payload, surface, desc); payload, surface, desc);
} }
void
brw_byte_scattered_read(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned bit_size)
{
const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen > 7 || devinfo->is_haswell);
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
const unsigned response_length = brw_surface_payload_size(p, 1, exec_size);
const unsigned desc =
brw_message_desc(devinfo, msg_length, response_length, false) |
brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, false);
brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
dst, payload, surface, desc);
}
void
brw_byte_scattered_write(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
unsigned bit_size,
bool header_present)
{
const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen > 7 || devinfo->is_haswell);
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
const unsigned desc =
brw_message_desc(devinfo, msg_length, 0, header_present) |
brw_dp_byte_scattered_rw_desc(devinfo, exec_size, bit_size, true);
brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE,
brw_writemask(brw_null_reg(),
WRITEMASK_XYZW),
payload, surface, desc);
}
void void
brw_typed_atomic(struct brw_codegen *p, brw_typed_atomic(struct brw_codegen *p,
struct brw_reg dst, struct brw_reg dst,

View File

@@ -4851,8 +4851,7 @@ emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
} }
static void static void
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
const fs_reg &sample_mask)
{ {
const gen_device_info *devinfo = bld.shader->devinfo; const gen_device_info *devinfo = bld.shader->devinfo;
@@ -4862,10 +4861,17 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &surface = inst->src[2]; const fs_reg &surface = inst->src[2];
const UNUSED fs_reg &dims = inst->src[3]; const UNUSED fs_reg &dims = inst->src[3];
const fs_reg &arg = inst->src[4]; const fs_reg &arg = inst->src[4];
assert(arg.file == IMM);
/* Calculate the total number of components of the payload. */ /* Calculate the total number of components of the payload. */
const unsigned addr_sz = inst->components_read(0); const unsigned addr_sz = inst->components_read(0);
const unsigned src_sz = inst->components_read(1); const unsigned src_sz = inst->components_read(1);
const bool is_typed_access =
inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||
inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL ||
inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL;
/* From the BDW PRM Volume 7, page 147: /* From the BDW PRM Volume 7, page 147:
* *
* "For the Data Cache Data Port*, the header must be present for the * "For the Data Cache Data Port*, the header must be present for the
@@ -4876,10 +4882,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
* messages prior to Gen9, since we have to provide a header anyway. On * messages prior to Gen9, since we have to provide a header anyway. On
* Gen11+ the header has been removed so we can only use predication. * Gen11+ the header has been removed so we can only use predication.
*/ */
const unsigned header_sz = devinfo->gen < 9 && const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
(op == SHADER_OPCODE_TYPED_SURFACE_READ ||
op == SHADER_OPCODE_TYPED_SURFACE_WRITE ||
op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0;
const unsigned sz = header_sz + addr_sz + src_sz; const unsigned sz = header_sz + addr_sz + src_sz;
/* Allocate space for the payload. */ /* Allocate space for the payload. */
@@ -4887,6 +4890,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
unsigned n = 0; unsigned n = 0;
const bool has_side_effects = inst->has_side_effects();
fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
fs_reg(brw_imm_d(0xffff));
/* Construct the payload. */ /* Construct the payload. */
if (header_sz) if (header_sz)
components[n++] = emit_surface_header(bld, sample_mask); components[n++] = emit_surface_header(bld, sample_mask);
@@ -4925,14 +4932,125 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
} }
} }
uint32_t sfid;
switch (inst->opcode) {
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
/* Byte scattered opcodes go through the normal data cache */
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
/* Untyped Surface messages go through the data cache but the SFID value
* changed on Haswell.
*/
sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
break;
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
/* Typed surface messages go through the render cache on IVB and the
* data cache on HSW+.
*/
sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
break;
default:
unreachable("Unsupported surface opcode");
}
uint32_t desc;
switch (inst->opcode) {
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
arg.ud, /* num_channels */
false /* write */);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
arg.ud, /* num_channels */
true /* write */);
break;
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
arg.ud, /* bit_size */
false /* write */);
break;
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
arg.ud, /* bit_size */
true /* write */);
break;
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,
arg.ud, /* atomic_op */
!inst->dst.is_null());
break;
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,
arg.ud, /* atomic_op */
!inst->dst.is_null());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
arg.ud, /* num_channels */
false /* write */);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group,
arg.ud, /* num_channels */
true /* write */);
break;
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,
arg.ud, /* atomic_op */
!inst->dst.is_null());
break;
default:
unreachable("Unknown surface logical instruction");
}
/* Update the original instruction. */ /* Update the original instruction. */
inst->opcode = op; inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
inst->header_size = header_sz; inst->header_size = header_sz;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
/* Set up SFID and descriptors */
inst->sfid = sfid;
inst->desc = desc;
if (surface.file == IMM) {
inst->desc |= surface.ud & 0xff;
inst->src[0] = brw_imm_ud(0);
} else {
const fs_builder ubld = bld.exec_all().group(1, 0);
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.AND(tmp, surface, brw_imm_ud(0xff));
inst->src[0] = component(tmp, 0);
}
inst->src[1] = brw_imm_ud(0); /* ex_desc */
/* Finally, the payload */
inst->src[2] = payload;
inst->src[0] = payload;
inst->src[1] = surface;
inst->src[2] = arg;
inst->resize_sources(3); inst->resize_sources(3);
delete[] components; delete[] components;
@@ -5076,57 +5194,15 @@ fs_visitor::lower_logical_sends()
break; break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
fs_reg());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
ibld.sample_mask_reg());
break;
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_BYTE_SCATTERED_READ,
fs_reg());
break;
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_BYTE_SCATTERED_WRITE,
ibld.sample_mask_reg());
break;
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_ATOMIC,
ibld.sample_mask_reg());
break;
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
ibld.sample_mask_reg());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_READ,
brw_imm_d(0xffff));
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_WRITE,
ibld.sample_mask_reg());
break;
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
lower_surface_logical_send(ibld, inst, lower_surface_logical_send(ibld, inst);
SHADER_OPCODE_TYPED_ATOMIC,
ibld.sample_mask_reg());
break; break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:

View File

@@ -2264,68 +2264,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_shader_time_add(inst, src[0], src[1], src[2]); generate_shader_time_add(inst, src[0], src[1], src[2]);
break; break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
inst->mlen, !inst->dst.is_null(),
inst->header_size);
break;
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
inst->mlen, !inst->dst.is_null(),
inst->header_size);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1],
inst->mlen, src[2].ud,
inst->header_size);
break;
case SHADER_OPCODE_BYTE_SCATTERED_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_byte_scattered_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud);
break;
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_byte_scattered_write(p, src[0], src[1],
inst->mlen, src[2].ud,
inst->header_size);
break;
case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1],
src[2].ud, inst->mlen, !inst->dst.is_null(),
inst->header_size);
break;
case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud,
inst->header_size);
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
inst->header_size);
break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:
brw_memory_fence(p, dst, BRW_OPCODE_SEND); brw_memory_fence(p, dst, BRW_OPCODE_SEND);
break; break;

View File

@@ -416,6 +416,78 @@ schedule_node::set_latency_gen7(bool is_haswell)
case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND:
switch (inst->sfid) { switch (inst->sfid) {
case GEN6_SFID_DATAPORT_RENDER_CACHE:
switch ((inst->desc >> 14) & 0x1f) {
case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE:
case GEN7_DATAPORT_RC_TYPED_SURFACE_READ:
/* See also SHADER_OPCODE_TYPED_SURFACE_READ */
assert(!is_haswell);
latency = 600;
break;
case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP:
/* See also SHADER_OPCODE_TYPED_ATOMIC */
assert(!is_haswell);
latency = 14000;
break;
default:
unreachable("Unknown render cache message");
}
break;
case GEN7_SFID_DATAPORT_DATA_CACHE:
switch ((inst->desc >> 14) & 0x1f) {
case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ:
case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE:
/* We have no data for this but assume it's roughly the same as
* untyped surface read/write.
*/
latency = 300;
break;
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
/* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
assert(!is_haswell);
latency = 600;
break;
case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
/* See also SHADER_OPCODE_UNTYPED_ATOMIC */
assert(!is_haswell);
latency = 14000;
break;
default:
unreachable("Unknown data cache message");
}
break;
case HSW_SFID_DATAPORT_DATA_CACHE_1:
switch ((inst->desc >> 14) & 0x1f) {
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE:
/* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */
latency = 300;
break;
case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
/* See also SHADER_OPCODE_UNTYPED_ATOMIC */
latency = 14000;
break;
default:
unreachable("Unknown data cache message");
}
break;
default: default:
unreachable("Unknown SFID"); unreachable("Unknown SFID");
} }