intel/eu: Plumb header present bit to codegen helpers for HDC messages.

This makes sure that the header-present bit of the message descriptor
is in sync with the IR instruction fields, which gives the optimizer
more control to avoid the overhead of setting up a message header when
it's possible to do so.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Francisco Jerez
2017-12-12 12:05:03 -08:00
parent 6edb332b44
commit e7c9adca57
4 changed files with 50 additions and 29 deletions

View File

@@ -444,7 +444,8 @@ brw_untyped_atomic(struct brw_codegen *p,
struct brw_reg surface, struct brw_reg surface,
unsigned atomic_op, unsigned atomic_op,
unsigned msg_length, unsigned msg_length,
bool response_expected); bool response_expected,
bool header_present);
void void
brw_untyped_surface_read(struct brw_codegen *p, brw_untyped_surface_read(struct brw_codegen *p,
@@ -459,7 +460,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned num_channels); unsigned num_channels,
bool header_present);
void void
brw_typed_atomic(struct brw_codegen *p, brw_typed_atomic(struct brw_codegen *p,
@@ -468,7 +470,8 @@ brw_typed_atomic(struct brw_codegen *p,
struct brw_reg surface, struct brw_reg surface,
unsigned atomic_op, unsigned atomic_op,
unsigned msg_length, unsigned msg_length,
bool response_expected); bool response_expected,
bool header_present);
void void
brw_typed_surface_read(struct brw_codegen *p, brw_typed_surface_read(struct brw_codegen *p,
@@ -476,14 +479,16 @@ brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned num_channels); unsigned num_channels,
bool header_present);
void void
brw_typed_surface_write(struct brw_codegen *p, brw_typed_surface_write(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned num_channels); unsigned num_channels,
bool header_present);
void void
brw_byte_scattered_read(struct brw_codegen *p, brw_byte_scattered_read(struct brw_codegen *p,
@@ -498,7 +503,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned bit_size); unsigned bit_size,
bool header_present);
void void
brw_memory_fence(struct brw_codegen *p, brw_memory_fence(struct brw_codegen *p,

View File

@@ -2883,7 +2883,8 @@ brw_untyped_atomic(struct brw_codegen *p,
struct brw_reg surface, struct brw_reg surface,
unsigned atomic_op, unsigned atomic_op,
unsigned msg_length, unsigned msg_length,
bool response_expected) bool response_expected,
bool header_present)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2901,7 +2902,7 @@ brw_untyped_atomic(struct brw_codegen *p,
p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
brw_surface_payload_size(p, response_expected, brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, true), devinfo->gen >= 8 || devinfo->is_haswell, true),
align1); header_present);
brw_set_dp_untyped_atomic_message( brw_set_dp_untyped_atomic_message(
p, insn, atomic_op, response_expected); p, insn, atomic_op, response_expected);
@@ -2984,7 +2985,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned num_channels) unsigned num_channels,
bool header_present)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2996,7 +2998,7 @@ brw_untyped_surface_write(struct brw_codegen *p,
WRITEMASK_X : WRITEMASK_XYZW; WRITEMASK_X : WRITEMASK_XYZW;
struct brw_inst *insn = brw_send_indirect_surface_message( struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), mask), p, sfid, brw_writemask(brw_null_reg(), mask),
payload, surface, msg_length, 0, align1); payload, surface, msg_length, 0, header_present);
brw_set_dp_untyped_surface_write_message( brw_set_dp_untyped_surface_write_message(
p, insn, num_channels); p, insn, num_channels);
@@ -3054,7 +3056,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned bit_size) unsigned bit_size,
bool header_present)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen > 7 || devinfo->is_haswell); assert(devinfo->gen > 7 || devinfo->is_haswell);
@@ -3063,7 +3066,7 @@ brw_byte_scattered_write(struct brw_codegen *p,
struct brw_inst *insn = brw_send_indirect_surface_message( struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW), p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW),
payload, surface, msg_length, 0, true); payload, surface, msg_length, 0, header_present);
unsigned msg_control = unsigned msg_control =
brw_byte_scattered_data_element_from_bit_size(bit_size) << 2; brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
@@ -3119,7 +3122,8 @@ brw_typed_atomic(struct brw_codegen *p,
struct brw_reg surface, struct brw_reg surface,
unsigned atomic_op, unsigned atomic_op,
unsigned msg_length, unsigned msg_length,
bool response_expected) { bool response_expected,
bool header_present) {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 : HSW_SFID_DATAPORT_DATA_CACHE_1 :
@@ -3131,7 +3135,7 @@ brw_typed_atomic(struct brw_codegen *p,
p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
brw_surface_payload_size(p, response_expected, brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, false), devinfo->gen >= 8 || devinfo->is_haswell, false),
true); header_present);
brw_set_dp_typed_atomic_message( brw_set_dp_typed_atomic_message(
p, insn, atomic_op, response_expected); p, insn, atomic_op, response_expected);
@@ -3175,7 +3179,8 @@ brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned num_channels) unsigned num_channels,
bool header_present)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -3185,7 +3190,7 @@ brw_typed_surface_read(struct brw_codegen *p,
p, sfid, dst, payload, surface, msg_length, p, sfid, dst, payload, surface, msg_length,
brw_surface_payload_size(p, num_channels, brw_surface_payload_size(p, num_channels,
devinfo->gen >= 8 || devinfo->is_haswell, false), devinfo->gen >= 8 || devinfo->is_haswell, false),
true); header_present);
brw_set_dp_typed_surface_read_message( brw_set_dp_typed_surface_read_message(
p, insn, num_channels); p, insn, num_channels);
@@ -3229,7 +3234,8 @@ brw_typed_surface_write(struct brw_codegen *p,
struct brw_reg payload, struct brw_reg payload,
struct brw_reg surface, struct brw_reg surface,
unsigned msg_length, unsigned msg_length,
unsigned num_channels) unsigned num_channels,
bool header_present)
{ {
const struct gen_device_info *devinfo = p->devinfo; const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -3241,7 +3247,7 @@ brw_typed_surface_write(struct brw_codegen *p,
WRITEMASK_X : WRITEMASK_XYZW); WRITEMASK_X : WRITEMASK_XYZW);
struct brw_inst *insn = brw_send_indirect_surface_message( struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, brw_writemask(brw_null_reg(), mask), p, sfid, brw_writemask(brw_null_reg(), mask),
payload, surface, msg_length, 0, true); payload, surface, msg_length, 0, header_present);
brw_set_dp_typed_surface_write_message( brw_set_dp_typed_surface_write_message(
p, insn, num_channels); p, insn, num_channels);

View File

@@ -2118,10 +2118,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
inst->mlen, !inst->dst.is_null()); inst->mlen, !inst->dst.is_null(),
inst->header_size);
break; break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], brw_untyped_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud); inst->mlen, src[2].ud);
@@ -2130,10 +2132,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], brw_untyped_surface_write(p, src[0], src[1],
inst->mlen, src[2].ud); inst->mlen, src[2].ud,
inst->header_size);
break; break;
case SHADER_OPCODE_BYTE_SCATTERED_READ: case SHADER_OPCODE_BYTE_SCATTERED_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_byte_scattered_read(p, dst, src[0], src[1], brw_byte_scattered_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud); inst->mlen, src[2].ud);
@@ -2142,24 +2146,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case SHADER_OPCODE_BYTE_SCATTERED_WRITE: case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_byte_scattered_write(p, src[0], src[1], brw_byte_scattered_write(p, src[0], src[1],
inst->mlen, src[2].ud); inst->mlen, src[2].ud,
inst->header_size);
break; break;
case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1], brw_typed_atomic(p, dst, src[0], src[1],
src[2].ud, inst->mlen, !inst->dst.is_null()); src[2].ud, inst->mlen, !inst->dst.is_null(),
inst->header_size);
break; break;
case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1], brw_typed_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].ud); inst->mlen, src[2].ud,
inst->header_size);
break; break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud); brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud,
inst->header_size);
break; break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE:

View File

@@ -1869,10 +1869,11 @@ generate_code(struct brw_codegen *p,
case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen, brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null()); !inst->dst.is_null(), inst->header_size);
break; break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen, brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].ud); src[2].ud);
@@ -1881,25 +1882,25 @@ generate_code(struct brw_codegen *p,
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], inst->mlen, brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
src[2].ud); src[2].ud, inst->header_size);
break; break;
case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen, brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null()); !inst->dst.is_null(), inst->header_size);
break; break;
case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_READ:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen, brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].ud); src[2].ud, inst->header_size);
break; break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE); assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_typed_surface_write(p, src[0], src[1], inst->mlen, brw_typed_surface_write(p, src[0], src[1], inst->mlen,
src[2].ud); src[2].ud, inst->header_size);
break; break;
case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_MEMORY_FENCE: