intel/fs: Lower untyped atomic messages to LSC when available
Bspec programming note metions that "Atomic messages are always forced to "un-cacheable" in the L1 cache". We can make the L1 cache un-cacheable and L3 with write-back policy. v2: (Sagar Ghuge): - Fix caching policy for atomic messages - Fix simd exec size v3: (Sagar Ghuge): - Add atomic messages to brw_schedule_instructions v4: (Jason Ekstrand): - Rebase on lsc_msg_desc reworks Co-authored-by: Sagar Ghuge <sagar.ghuge@intel.com> Co-authored-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>
This commit is contained in:
@@ -5842,6 +5842,42 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->resize_sources(4);
|
inst->resize_sources(4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum lsc_opcode
|
||||||
|
brw_atomic_op_to_lsc_atomic_op(unsigned op)
|
||||||
|
{
|
||||||
|
switch(op) {
|
||||||
|
case BRW_AOP_AND:
|
||||||
|
return LSC_OP_ATOMIC_AND;
|
||||||
|
case BRW_AOP_OR:
|
||||||
|
return LSC_OP_ATOMIC_OR;
|
||||||
|
case BRW_AOP_XOR:
|
||||||
|
return LSC_OP_ATOMIC_XOR;
|
||||||
|
case BRW_AOP_MOV:
|
||||||
|
return LSC_OP_ATOMIC_STORE;
|
||||||
|
case BRW_AOP_INC:
|
||||||
|
return LSC_OP_ATOMIC_INC;
|
||||||
|
case BRW_AOP_DEC:
|
||||||
|
return LSC_OP_ATOMIC_DEC;
|
||||||
|
case BRW_AOP_ADD:
|
||||||
|
return LSC_OP_ATOMIC_ADD;
|
||||||
|
case BRW_AOP_SUB:
|
||||||
|
return LSC_OP_ATOMIC_SUB;
|
||||||
|
case BRW_AOP_IMAX:
|
||||||
|
return LSC_OP_ATOMIC_MAX;
|
||||||
|
case BRW_AOP_IMIN:
|
||||||
|
return LSC_OP_ATOMIC_MIN;
|
||||||
|
case BRW_AOP_UMAX:
|
||||||
|
return LSC_OP_ATOMIC_UMAX;
|
||||||
|
case BRW_AOP_UMIN:
|
||||||
|
return LSC_OP_ATOMIC_UMIN;
|
||||||
|
case BRW_AOP_CMPWR:
|
||||||
|
return LSC_OP_ATOMIC_CMPXCHG;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
unreachable("invalid atomic opcode");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
@@ -5915,6 +5951,22 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
LSC_CACHE_STORE_L1STATE_L3MOCS,
|
LSC_CACHE_STORE_L1STATE_L3MOCS,
|
||||||
false /* has_dest */);
|
false /* has_dest */);
|
||||||
break;
|
break;
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
/* Bspec: Atomic instruction -> Cache section:
|
||||||
|
*
|
||||||
|
* Atomic messages are always forced to "un-cacheable" in the L1
|
||||||
|
* cache.
|
||||||
|
*/
|
||||||
|
inst->desc = lsc_msg_desc(devinfo,
|
||||||
|
brw_atomic_op_to_lsc_atomic_op(arg.ud),
|
||||||
|
inst->exec_size,
|
||||||
|
surf_type, LSC_ADDR_SIZE_A32,
|
||||||
|
1 /* num_coordinates */,
|
||||||
|
LSC_DATA_SIZE_D32, 1 /* num_channels */,
|
||||||
|
false /* transpose */,
|
||||||
|
LSC_CACHE_STORE_L1UC_L3WB,
|
||||||
|
!inst->dst.is_null());
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("Unknown surface logical instruction");
|
unreachable("Unknown surface logical instruction");
|
||||||
}
|
}
|
||||||
@@ -6530,6 +6582,7 @@ fs_visitor::lower_logical_sends()
|
|||||||
|
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||||
if (devinfo->has_lsc) {
|
if (devinfo->has_lsc) {
|
||||||
lower_lsc_surface_logical_send(ibld, inst);
|
lower_lsc_surface_logical_send(ibld, inst);
|
||||||
break;
|
break;
|
||||||
@@ -6538,7 +6591,6 @@ fs_visitor::lower_logical_sends()
|
|||||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
|
case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
|
||||||
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
|
||||||
|
@@ -1103,6 +1103,25 @@ namespace {
|
|||||||
0, 20 /* XXX */,
|
0, 20 /* XXX */,
|
||||||
10 /* XXX */, 100 /* XXX */, 0, 0,
|
10 /* XXX */, 100 /* XXX */, 0, 0,
|
||||||
0, 0);
|
0, 0);
|
||||||
|
|
||||||
|
case LSC_OP_ATOMIC_INC:
|
||||||
|
case LSC_OP_ATOMIC_DEC:
|
||||||
|
case LSC_OP_ATOMIC_LOAD:
|
||||||
|
case LSC_OP_ATOMIC_STORE:
|
||||||
|
case LSC_OP_ATOMIC_ADD:
|
||||||
|
case LSC_OP_ATOMIC_SUB:
|
||||||
|
case LSC_OP_ATOMIC_MIN:
|
||||||
|
case LSC_OP_ATOMIC_MAX:
|
||||||
|
case LSC_OP_ATOMIC_UMIN:
|
||||||
|
case LSC_OP_ATOMIC_UMAX:
|
||||||
|
case LSC_OP_ATOMIC_CMPXCHG:
|
||||||
|
case LSC_OP_ATOMIC_AND:
|
||||||
|
case LSC_OP_ATOMIC_OR:
|
||||||
|
case LSC_OP_ATOMIC_XOR:
|
||||||
|
return calculate_desc(info, unit_dp_dc, 2, 0, 0,
|
||||||
|
30 /* XXX */, 400 /* XXX */,
|
||||||
|
10 /* XXX */, 100 /* XXX */, 0, 0,
|
||||||
|
0, 400 /* XXX */);
|
||||||
default:
|
default:
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
@@ -536,6 +536,22 @@ schedule_node::set_latency_gfx7(bool is_haswell)
|
|||||||
case LSC_OP_STORE_CMASK:
|
case LSC_OP_STORE_CMASK:
|
||||||
latency = 300;
|
latency = 300;
|
||||||
break;
|
break;
|
||||||
|
case LSC_OP_ATOMIC_INC:
|
||||||
|
case LSC_OP_ATOMIC_DEC:
|
||||||
|
case LSC_OP_ATOMIC_LOAD:
|
||||||
|
case LSC_OP_ATOMIC_STORE:
|
||||||
|
case LSC_OP_ATOMIC_ADD:
|
||||||
|
case LSC_OP_ATOMIC_SUB:
|
||||||
|
case LSC_OP_ATOMIC_MIN:
|
||||||
|
case LSC_OP_ATOMIC_MAX:
|
||||||
|
case LSC_OP_ATOMIC_UMIN:
|
||||||
|
case LSC_OP_ATOMIC_UMAX:
|
||||||
|
case LSC_OP_ATOMIC_CMPXCHG:
|
||||||
|
case LSC_OP_ATOMIC_AND:
|
||||||
|
case LSC_OP_ATOMIC_OR:
|
||||||
|
case LSC_OP_ATOMIC_XOR:
|
||||||
|
latency = 1400;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("unsupported new data port message instruction");
|
unreachable("unsupported new data port message instruction");
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user