intel/compiler: Implement untyped atomic float min, max, and compare-swap dataport messages
v2: Split changes to the message type field to another patch. Suggested by Caio. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
@@ -421,6 +421,8 @@ static const char *const dp_dc1_msg_type_hsw[32] = {
|
|||||||
[HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] =
|
[HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2] =
|
||||||
"DC 4x2 atomic counter op",
|
"DC 4x2 atomic counter op",
|
||||||
[HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
|
[HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE] = "DC typed surface write",
|
||||||
|
[GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP] =
|
||||||
|
"DC untyped atomic float op",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *const aop[16] = {
|
static const char *const aop[16] = {
|
||||||
@@ -441,6 +443,12 @@ static const char *const aop[16] = {
|
|||||||
[BRW_AOP_PREDEC] = "predec",
|
[BRW_AOP_PREDEC] = "predec",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *const aop_float[4] = {
|
||||||
|
[BRW_AOP_FMAX] = "fmax",
|
||||||
|
[BRW_AOP_FMIN] = "fmin",
|
||||||
|
[BRW_AOP_FCMPWR] = "fcmpwr",
|
||||||
|
};
|
||||||
|
|
||||||
static const char * const pixel_interpolator_msg_types[4] = {
|
static const char * const pixel_interpolator_msg_types[4] = {
|
||||||
[GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset",
|
[GEN7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET] = "per_message_offset",
|
||||||
[GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position",
|
[GEN7_PIXEL_INTERPOLATOR_LOC_SAMPLE] = "sample_position",
|
||||||
@@ -1797,6 +1805,11 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||||||
simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf);
|
simd_modes[msg_ctrl >> 4], msg_ctrl & 0xf);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
|
format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
|
||||||
|
control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
|
||||||
|
&space);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
format(file, "0x%x", msg_ctrl);
|
format(file, "0x%x", msg_ctrl);
|
||||||
}
|
}
|
||||||
|
@@ -577,6 +577,17 @@ brw_untyped_atomic(struct brw_codegen *p,
|
|||||||
bool response_expected,
|
bool response_expected,
|
||||||
bool header_present);
|
bool header_present);
|
||||||
|
|
||||||
|
void
|
||||||
|
brw_untyped_atomic_float(struct brw_codegen *p,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg payload,
|
||||||
|
struct brw_reg surface,
|
||||||
|
unsigned atomic_op,
|
||||||
|
unsigned msg_length,
|
||||||
|
bool response_expected,
|
||||||
|
bool header_present);
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_untyped_surface_read(struct brw_codegen *p,
|
brw_untyped_surface_read(struct brw_codegen *p,
|
||||||
struct brw_reg dst,
|
struct brw_reg dst,
|
||||||
|
@@ -395,6 +395,8 @@ enum opcode {
|
|||||||
*/
|
*/
|
||||||
SHADER_OPCODE_UNTYPED_ATOMIC,
|
SHADER_OPCODE_UNTYPED_ATOMIC,
|
||||||
SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
|
||||||
|
SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
SHADER_OPCODE_UNTYPED_SURFACE_READ,
|
SHADER_OPCODE_UNTYPED_SURFACE_READ,
|
||||||
SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
|
SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
|
||||||
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
|
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
|
||||||
@@ -1159,6 +1161,7 @@ enum brw_message_target {
|
|||||||
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11
|
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11
|
||||||
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12
|
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12
|
||||||
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
|
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
|
||||||
|
#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
|
||||||
|
|
||||||
/* GEN9 */
|
/* GEN9 */
|
||||||
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
|
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
|
||||||
@@ -1177,7 +1180,9 @@ enum brw_message_target {
|
|||||||
#define GEN8_BTI_STATELESS_IA_COHERENT 255
|
#define GEN8_BTI_STATELESS_IA_COHERENT 255
|
||||||
#define GEN8_BTI_STATELESS_NON_COHERENT 253
|
#define GEN8_BTI_STATELESS_NON_COHERENT 253
|
||||||
|
|
||||||
/* dataport atomic operations. */
|
/* Dataport atomic operations for Untyped Atomic Integer Operation message
|
||||||
|
* (and others).
|
||||||
|
*/
|
||||||
#define BRW_AOP_AND 1
|
#define BRW_AOP_AND 1
|
||||||
#define BRW_AOP_OR 2
|
#define BRW_AOP_OR 2
|
||||||
#define BRW_AOP_XOR 3
|
#define BRW_AOP_XOR 3
|
||||||
@@ -1194,6 +1199,11 @@ enum brw_message_target {
|
|||||||
#define BRW_AOP_CMPWR 14
|
#define BRW_AOP_CMPWR 14
|
||||||
#define BRW_AOP_PREDEC 15
|
#define BRW_AOP_PREDEC 15
|
||||||
|
|
||||||
|
/* Dataport atomic operations for Untyped Atomic Float Operation message. */
|
||||||
|
#define BRW_AOP_FMAX 1
|
||||||
|
#define BRW_AOP_FMIN 2
|
||||||
|
#define BRW_AOP_FCMPWR 3
|
||||||
|
|
||||||
#define BRW_MATH_FUNCTION_INV 1
|
#define BRW_MATH_FUNCTION_INV 1
|
||||||
#define BRW_MATH_FUNCTION_LOG 2
|
#define BRW_MATH_FUNCTION_LOG 2
|
||||||
#define BRW_MATH_FUNCTION_EXP 3
|
#define BRW_MATH_FUNCTION_EXP 3
|
||||||
|
@@ -2800,6 +2800,53 @@ brw_untyped_atomic(struct brw_codegen *p,
|
|||||||
payload, surface, desc);
|
payload, surface, desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
brw_dp_untyped_atomic_float_desc(struct brw_codegen *p,
|
||||||
|
unsigned atomic_op,
|
||||||
|
bool response_expected)
|
||||||
|
{
|
||||||
|
const struct gen_device_info *devinfo = p->devinfo;
|
||||||
|
const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
|
||||||
|
unsigned msg_control =
|
||||||
|
atomic_op | /* Atomic Operation Type: BRW_AOP_F* */
|
||||||
|
(response_expected ? 1 << 5 : 0); /* Return data expected */
|
||||||
|
|
||||||
|
assert(devinfo->gen >= 9);
|
||||||
|
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
|
||||||
|
|
||||||
|
if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
|
||||||
|
msg_control |= 1 << 4; /* SIMD8 mode */
|
||||||
|
|
||||||
|
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
brw_untyped_atomic_float(struct brw_codegen *p,
|
||||||
|
struct brw_reg dst,
|
||||||
|
struct brw_reg payload,
|
||||||
|
struct brw_reg surface,
|
||||||
|
unsigned atomic_op,
|
||||||
|
unsigned msg_length,
|
||||||
|
bool response_expected,
|
||||||
|
bool header_present)
|
||||||
|
{
|
||||||
|
const struct gen_device_info *devinfo = p->devinfo;
|
||||||
|
|
||||||
|
assert(devinfo->gen >= 9);
|
||||||
|
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
|
||||||
|
|
||||||
|
const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
|
||||||
|
const unsigned response_length = brw_surface_payload_size(
|
||||||
|
p, response_expected, true, true);
|
||||||
|
const unsigned desc =
|
||||||
|
brw_message_desc(devinfo, msg_length, response_length, header_present) |
|
||||||
|
brw_dp_untyped_atomic_float_desc(p, atomic_op, response_expected);
|
||||||
|
|
||||||
|
brw_send_indirect_surface_message(p, sfid,
|
||||||
|
brw_writemask(dst, WRITEMASK_XYZW),
|
||||||
|
payload, surface, desc);
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
brw_dp_untyped_surface_read_desc(struct brw_codegen *p,
|
brw_dp_untyped_surface_read_desc(struct brw_codegen *p,
|
||||||
unsigned num_channels)
|
unsigned num_channels)
|
||||||
|
@@ -242,6 +242,7 @@ fs_inst::is_send_from_grf() const
|
|||||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
||||||
@@ -808,6 +809,20 @@ fs_inst::components_read(unsigned i) const
|
|||||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||||
return (i == 0 ? 2 : 1);
|
return (i == 0 ? 2 : 1);
|
||||||
|
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: {
|
||||||
|
assert(src[3].file == IMM &&
|
||||||
|
src[4].file == IMM);
|
||||||
|
const unsigned op = src[4].ud;
|
||||||
|
/* Surface coordinates. */
|
||||||
|
if (i == 0)
|
||||||
|
return src[3].ud;
|
||||||
|
/* Surface operation source. */
|
||||||
|
else if (i == 1 && op == BRW_AOP_FCMPWR)
|
||||||
|
return 2;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -835,6 +850,7 @@ fs_inst::size_read(int arg) const
|
|||||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||||
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
|
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||||
@@ -4976,6 +4992,12 @@ fs_visitor::lower_logical_sends()
|
|||||||
ibld.sample_mask_reg());
|
ibld.sample_mask_reg());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
|
lower_surface_logical_send(ibld, inst,
|
||||||
|
SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT,
|
||||||
|
ibld.sample_mask_reg());
|
||||||
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
||||||
lower_surface_logical_send(ibld, inst,
|
lower_surface_logical_send(ibld, inst,
|
||||||
SHADER_OPCODE_TYPED_SURFACE_READ,
|
SHADER_OPCODE_TYPED_SURFACE_READ,
|
||||||
@@ -5479,6 +5501,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||||||
return 8;
|
return 8;
|
||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
|
@@ -222,8 +222,12 @@ public:
|
|||||||
nir_intrinsic_instr *instr);
|
nir_intrinsic_instr *instr);
|
||||||
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
|
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
|
||||||
int op, nir_intrinsic_instr *instr);
|
int op, nir_intrinsic_instr *instr);
|
||||||
|
void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
|
||||||
|
int op, nir_intrinsic_instr *instr);
|
||||||
void nir_emit_shared_atomic(const brw::fs_builder &bld,
|
void nir_emit_shared_atomic(const brw::fs_builder &bld,
|
||||||
int op, nir_intrinsic_instr *instr);
|
int op, nir_intrinsic_instr *instr);
|
||||||
|
void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
|
||||||
|
int op, nir_intrinsic_instr *instr);
|
||||||
void nir_emit_texture(const brw::fs_builder &bld,
|
void nir_emit_texture(const brw::fs_builder &bld,
|
||||||
nir_tex_instr *instr);
|
nir_tex_instr *instr);
|
||||||
void nir_emit_jump(const brw::fs_builder &bld,
|
void nir_emit_jump(const brw::fs_builder &bld,
|
||||||
|
@@ -679,6 +679,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||||
@@ -720,6 +721,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
|||||||
case SHADER_OPCODE_TG4_LOGICAL:
|
case SHADER_OPCODE_TG4_LOGICAL:
|
||||||
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||||
|
@@ -55,6 +55,8 @@ can_omit_write(const fs_inst *inst)
|
|||||||
switch (inst->opcode) {
|
switch (inst->opcode) {
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||||
return true;
|
return true;
|
||||||
|
@@ -2196,6 +2196,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||||||
inst->header_size);
|
inst->header_size);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||||
|
brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud,
|
||||||
|
inst->mlen, !inst->dst.is_null(),
|
||||||
|
inst->header_size);
|
||||||
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
assert(!inst->header_size);
|
assert(!inst->header_size);
|
||||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||||
|
@@ -3689,6 +3689,15 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
|
|||||||
case nir_intrinsic_shared_atomic_comp_swap:
|
case nir_intrinsic_shared_atomic_comp_swap:
|
||||||
nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
|
nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_shared_atomic_fmin:
|
||||||
|
nir_emit_shared_atomic_float(bld, BRW_AOP_FMIN, instr);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_shared_atomic_fmax:
|
||||||
|
nir_emit_shared_atomic_float(bld, BRW_AOP_FMAX, instr);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_shared_atomic_fcomp_swap:
|
||||||
|
nir_emit_shared_atomic_float(bld, BRW_AOP_FCMPWR, instr);
|
||||||
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_shared: {
|
case nir_intrinsic_load_shared: {
|
||||||
assert(devinfo->gen >= 7);
|
assert(devinfo->gen >= 7);
|
||||||
@@ -4398,6 +4407,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||||
nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
|
nir_emit_ssbo_atomic(bld, BRW_AOP_CMPWR, instr);
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmin:
|
||||||
|
nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMIN, instr);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_fmax:
|
||||||
|
nir_emit_ssbo_atomic_float(bld, BRW_AOP_FMAX, instr);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_fcomp_swap:
|
||||||
|
nir_emit_ssbo_atomic_float(bld, BRW_AOP_FCMPWR, instr);
|
||||||
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_get_buffer_size: {
|
case nir_intrinsic_get_buffer_size: {
|
||||||
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
|
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
|
||||||
@@ -4886,6 +4904,54 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
|||||||
bld.MOV(dest, atomic_result);
|
bld.MOV(dest, atomic_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
|
||||||
|
int op, nir_intrinsic_instr *instr)
|
||||||
|
{
|
||||||
|
if (stage == MESA_SHADER_FRAGMENT)
|
||||||
|
brw_wm_prog_data(prog_data)->has_side_effects = true;
|
||||||
|
|
||||||
|
fs_reg dest;
|
||||||
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||||
|
dest = get_nir_dest(instr->dest);
|
||||||
|
|
||||||
|
fs_reg surface;
|
||||||
|
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
|
||||||
|
if (const_surface) {
|
||||||
|
unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
|
||||||
|
const_surface->u32[0];
|
||||||
|
surface = brw_imm_ud(surf_index);
|
||||||
|
brw_mark_surface_used(prog_data, surf_index);
|
||||||
|
} else {
|
||||||
|
surface = vgrf(glsl_type::uint_type);
|
||||||
|
bld.ADD(surface, get_nir_src(instr->src[0]),
|
||||||
|
brw_imm_ud(stage_prog_data->binding_table.ssbo_start));
|
||||||
|
|
||||||
|
/* Assume this may touch any SSBO. This is the same we do for other
|
||||||
|
* UBO/SSBO accesses with non-constant surface.
|
||||||
|
*/
|
||||||
|
brw_mark_surface_used(prog_data,
|
||||||
|
stage_prog_data->binding_table.ssbo_start +
|
||||||
|
nir->info.num_ssbos - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fs_reg offset = get_nir_src(instr->src[1]);
|
||||||
|
fs_reg data1 = get_nir_src(instr->src[2]);
|
||||||
|
fs_reg data2;
|
||||||
|
if (op == BRW_AOP_FCMPWR)
|
||||||
|
data2 = get_nir_src(instr->src[3]);
|
||||||
|
|
||||||
|
/* Emit the actual atomic operation */
|
||||||
|
|
||||||
|
fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset,
|
||||||
|
data1, data2,
|
||||||
|
1 /* dims */, 1 /* rsize */,
|
||||||
|
op,
|
||||||
|
BRW_PREDICATE_NONE);
|
||||||
|
dest.type = atomic_result.type;
|
||||||
|
bld.MOV(dest, atomic_result);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
|
fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
|
||||||
int op, nir_intrinsic_instr *instr)
|
int op, nir_intrinsic_instr *instr)
|
||||||
@@ -4923,6 +4989,43 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
|
|||||||
bld.MOV(dest, atomic_result);
|
bld.MOV(dest, atomic_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
|
||||||
|
int op, nir_intrinsic_instr *instr)
|
||||||
|
{
|
||||||
|
fs_reg dest;
|
||||||
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||||
|
dest = get_nir_dest(instr->dest);
|
||||||
|
|
||||||
|
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
|
||||||
|
fs_reg offset;
|
||||||
|
fs_reg data1 = get_nir_src(instr->src[1]);
|
||||||
|
fs_reg data2;
|
||||||
|
if (op == BRW_AOP_FCMPWR)
|
||||||
|
data2 = get_nir_src(instr->src[2]);
|
||||||
|
|
||||||
|
/* Get the offset */
|
||||||
|
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
|
||||||
|
if (const_offset) {
|
||||||
|
offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
|
||||||
|
} else {
|
||||||
|
offset = vgrf(glsl_type::uint_type);
|
||||||
|
bld.ADD(offset,
|
||||||
|
retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
|
||||||
|
brw_imm_ud(instr->const_index[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emit the actual atomic operation operation */
|
||||||
|
|
||||||
|
fs_reg atomic_result = emit_untyped_atomic_float(bld, surface, offset,
|
||||||
|
data1, data2,
|
||||||
|
1 /* dims */, 1 /* rsize */,
|
||||||
|
op,
|
||||||
|
BRW_PREDICATE_NONE);
|
||||||
|
dest.type = atomic_result.type;
|
||||||
|
bld.MOV(dest, atomic_result);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
||||||
{
|
{
|
||||||
|
@@ -110,6 +110,30 @@ namespace brw {
|
|||||||
addr, tmp, surface, dims, op, rsize, pred);
|
addr, tmp, surface, dims, op, rsize, pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emit an untyped surface atomic float opcode. \p dims determines the
|
||||||
|
* number of components of the address and \p rsize the number of
|
||||||
|
* components of the returned value (either zero or one).
|
||||||
|
*/
|
||||||
|
fs_reg
|
||||||
|
emit_untyped_atomic_float(const fs_builder &bld,
|
||||||
|
const fs_reg &surface, const fs_reg &addr,
|
||||||
|
const fs_reg &src0, const fs_reg &src1,
|
||||||
|
unsigned dims, unsigned rsize, unsigned op,
|
||||||
|
brw_predicate pred)
|
||||||
|
{
|
||||||
|
/* FINISHME: Factor out this frequently recurring pattern into a
|
||||||
|
* helper function.
|
||||||
|
*/
|
||||||
|
const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
|
||||||
|
const fs_reg srcs[] = { src0, src1 };
|
||||||
|
const fs_reg tmp = bld.vgrf(src0.type, n);
|
||||||
|
bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
|
||||||
|
|
||||||
|
return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
|
addr, tmp, surface, dims, op, rsize, pred);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Emit a typed surface read opcode. \p dims determines the number of
|
* Emit a typed surface read opcode. \p dims determines the number of
|
||||||
* components of the address and \p size the number of components of the
|
* components of the address and \p size the number of components of the
|
||||||
|
@@ -48,6 +48,13 @@ namespace brw {
|
|||||||
unsigned dims, unsigned rsize, unsigned op,
|
unsigned dims, unsigned rsize, unsigned op,
|
||||||
brw_predicate pred = BRW_PREDICATE_NONE);
|
brw_predicate pred = BRW_PREDICATE_NONE);
|
||||||
|
|
||||||
|
fs_reg
|
||||||
|
emit_untyped_atomic_float(const fs_builder &bld,
|
||||||
|
const fs_reg &surface, const fs_reg &addr,
|
||||||
|
const fs_reg &src0, const fs_reg &src1,
|
||||||
|
unsigned dims, unsigned rsize, unsigned op,
|
||||||
|
brw_predicate pred);
|
||||||
|
|
||||||
fs_reg
|
fs_reg
|
||||||
emit_typed_read(const fs_builder &bld, const fs_reg &surface,
|
emit_typed_read(const fs_builder &bld, const fs_reg &surface,
|
||||||
const fs_reg &addr, unsigned dims, unsigned size);
|
const fs_reg &addr, unsigned dims, unsigned size);
|
||||||
|
@@ -369,6 +369,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||||
/* Test code:
|
/* Test code:
|
||||||
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
|
* mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
|
||||||
|
@@ -274,6 +274,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||||||
return "untyped_atomic";
|
return "untyped_atomic";
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
return "untyped_atomic_logical";
|
return "untyped_atomic_logical";
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
|
return "untyped_atomic_float";
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
|
return "untyped_atomic_float_logical";
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
return "untyped_surface_read";
|
return "untyped_surface_read";
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
||||||
@@ -996,6 +1000,8 @@ backend_instruction::has_side_effects() const
|
|||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
||||||
|
Reference in New Issue
Block a user