intel/compiler: Define new LSC data port encodings

Xe-HPG comes with a massively reworked dataport.  The new thing, called
Load/Store Cache or LSC, has a significantly improved interface.
Instead of bespoke messages for every case, there's basically one or two
messages with different bits to control things like address size, how
much data is read/written, etc.  It's way nicer but also means we get to
rewrite all our dataport encoding/decoding code.  This patch kicks off
the party with all of the new enums.

v2 (Jason Ekstrand, Mark Janes):
 - Rename to LSC

v3 (Jason Ekstrand):
 - Add numbers to all enums

Co-authored-by: Mark Janes <mark.a.janes@intel.com>
Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>
This commit is contained in:
Sagar Ghuge
2020-07-11 16:37:29 -07:00
committed by Marge Bot
parent 4a85a5e09f
commit cf612e4dc1

View File

@@ -1284,6 +1284,10 @@ enum brw_message_target {
HSW_SFID_DATAPORT_DATA_CACHE_1 = 12,
HSW_SFID_CRE = 13,
GFX12_SFID_TGM = 13, /* Typed Global Memory */
GFX12_SFID_SLM = 14, /* Shared Local Memory */
GFX12_SFID_UGM = 15, /* Untyped Global Memory */
GEN_RT_SFID_BINDLESS_THREAD_DISPATCH = 7,
GEN_RT_SFID_RAY_TRACE_ACCELERATOR = 8,
};
@@ -1703,4 +1707,274 @@ enum PACKED brw_rnd_mode {
#define GEN_RT_BTD_SHADER_TYPE_MISS 2
#define GEN_RT_BTD_SHADER_TYPE_INTERSECTION 3
/* Starting with Xe-HPG, the old dataport was massively reworked dataport.
* The new thing, called Load/Store Cache or LSC, has a significantly improved
* interface. Instead of bespoke messages for every case, there's basically
* one or two messages with different bits to control things like address
* size, how much data is read/written, etc. It's way nicer but also means we
* get to rewrite all our dataport encoding/decoding code. This patch kicks
* off the party with all of the new enums.
*/
enum lsc_opcode {
LSC_OP_LOAD = 0,
LSC_OP_LOAD_CMASK = 2,
LSC_OP_STORE = 4,
LSC_OP_STORE_CMASK = 6,
LSC_OP_ATOMIC_INC = 8,
LSC_OP_ATOMIC_DEC = 9,
LSC_OP_ATOMIC_LOAD = 10,
LSC_OP_ATOMIC_STORE = 11,
LSC_OP_ATOMIC_ADD = 12,
LSC_OP_ATOMIC_SUB = 13,
LSC_OP_ATOMIC_MIN = 14,
LSC_OP_ATOMIC_MAX = 15,
LSC_OP_ATOMIC_UMIN = 16,
LSC_OP_ATOMIC_UMAX = 17,
LSC_OP_ATOMIC_CMPXCHG = 18,
LSC_OP_ATOMIC_FADD = 19,
LSC_OP_ATOMIC_FSUB = 20,
LSC_OP_ATOMIC_FMIN = 21,
LSC_OP_ATOMIC_FMAX = 22,
LSC_OP_ATOMIC_FCMPXCHG = 23,
LSC_OP_ATOMIC_AND = 24,
LSC_OP_ATOMIC_OR = 25,
LSC_OP_ATOMIC_XOR = 26,
LSC_OP_FENCE = 31
};
/*
* Specifies the size of the dataport address payload in registers.
*/
enum PACKED lsc_addr_reg_size {
LSC_ADDR_REG_SIZE_1 = 1,
LSC_ADDR_REG_SIZE_2 = 2,
LSC_ADDR_REG_SIZE_3 = 3,
LSC_ADDR_REG_SIZE_4 = 4,
LSC_ADDR_REG_SIZE_6 = 6,
LSC_ADDR_REG_SIZE_8 = 8,
};
/*
* Specifies the size of the address payload item in a dataport message.
*/
enum PACKED lsc_addr_size {
LSC_ADDR_SIZE_A16 = 1, /* 16-bit address offset */
LSC_ADDR_SIZE_A32 = 2, /* 32-bit address offset */
LSC_ADDR_SIZE_A64 = 3, /* 64-bit address offset */
};
/*
* Specifies the type of the address payload item in a dataport message. The
* address type specifies how the dataport message decodes the Extended
* Descriptor for the surface attributes and address calculation.
*/
enum PACKED lsc_addr_surface_type {
LSC_ADDR_SURFTYPE_FLAT = 0, /* Flat */
LSC_ADDR_SURFTYPE_BSS = 1, /* Bindless surface state */
LSC_ADDR_SURFTYPE_SS = 2, /* Surface state */
LSC_ADDR_SURFTYPE_BTI = 3, /* Binding table index */
};
/*
* Specifies the dataport message override to the default L1 and L3 memory
* cache policies. Dataport L1 cache policies are uncached (UC), cached (C),
* cache streaming (S) and invalidate-after-read (IAR). Dataport L3 cache
* policies are uncached (UC) and cached (C).
*/
enum lsc_cache_load {
/* No override. Use the non-pipelined state or surface state cache settings
* for L1 and L3.
*/
LSC_CACHE_LOAD_L1STATE_L3MOCS = 0,
/* Override to L1 uncached and L3 uncached */
LSC_CACHE_LOAD_L1UC_L3UC = 1,
/* Override to L1 uncached and L3 cached */
LSC_CACHE_LOAD_L1UC_L3C = 2,
/* Override to L1 cached and L3 uncached */
LSC_CACHE_LOAD_L1C_L3UC = 3,
/* Override to cache at both L1 and L3 */
LSC_CACHE_LOAD_L1C_L3C = 4,
/* Override to L1 streaming load and L3 uncached */
LSC_CACHE_LOAD_L1S_L3UC = 5,
/* Override to L1 streaming load and L3 cached */
LSC_CACHE_LOAD_L1S_L3C = 6,
/* For load messages, override to L1 invalidate-after-read, and L3 cached. */
LSC_CACHE_LOAD_L1IAR_L3C = 7,
};
/*
* Specifies the dataport message override to the default L1 and L3 memory
* cache policies. Dataport L1 cache policies are uncached (UC), write-through
* (WT), write-back (WB) and streaming (S). Dataport L3 cache policies are
* uncached (UC) and cached (WB).
*/
enum PACKED lsc_cache_store {
/* No override. Use the non-pipelined or surface state cache settings for L1
* and L3.
*/
LSC_CACHE_STORE_L1STATE_L3MOCS = 0,
/* Override to L1 uncached and L3 uncached */
LSC_CACHE_STORE_L1UC_L3UC = 1,
/* Override to L1 uncached and L3 cached */
LSC_CACHE_STORE_L1UC_L3WB = 2,
/* Override to L1 write-through and L3 uncached */
LSC_CACHE_STORE_L1WT_L3UC = 3,
/* Override to L1 write-through and L3 cached */
LSC_CACHE_STORE_L1WT_L3WB = 4,
/* Override to L1 streaming and L3 uncached */
LSC_CACHE_STORE_L1S_L3UC = 5,
/* Override to L1 streaming and L3 cached */
LSC_CACHE_STORE_L1S_L3WB = 6,
/* Override to L1 write-back, and L3 cached */
LSC_CACHE_STORE_L1WB_L3WB = 7,
};
/*
* Specifies which components of the data payload 4-element vector (X,Y,Z,W) is
* packed into the register payload.
*/
enum PACKED lsc_cmask {
LSC_CMASK_X = 0x1,
LSC_CMASK_Y = 0x2,
LSC_CMASK_XY = 0x3,
LSC_CMASK_Z = 0x4,
LSC_CMASK_XZ = 0x5,
LSC_CMASK_YZ = 0x6,
LSC_CMASK_XYZ = 0x7,
LSC_CMASK_W = 0x8,
LSC_CMASK_XW = 0x9,
LSC_CMASK_YW = 0xa,
LSC_CMASK_XYW = 0xb,
LSC_CMASK_ZW = 0xc,
LSC_CMASK_XZW = 0xd,
LSC_CMASK_YZW = 0xe,
LSC_CMASK_XYZW = 0xf,
};
/*
* Specifies the size of the data payload item in a dataport message.
*/
enum PACKED lsc_data_size {
/* 8-bit scalar data value in memory, packed into a 8-bit data value in
* register.
*/
LSC_DATA_SIZE_D8 = 0,
/* 16-bit scalar data value in memory, packed into a 16-bit data value in
* register.
*/
LSC_DATA_SIZE_D16 = 1,
/* 32-bit scalar data value in memory, packed into 32-bit data value in
* register.
*/
LSC_DATA_SIZE_D32 = 2,
/* 64-bit scalar data value in memory, packed into 64-bit data value in
* register.
*/
LSC_DATA_SIZE_D64 = 3,
/* 8-bit scalar data value in memory, packed into 32-bit unsigned data value
* in register.
*/
LSC_DATA_SIZE_D8U32 = 4,
/* 16-bit scalar data value in memory, packed into 32-bit unsigned data
* value in register.
*/
LSC_DATA_SIZE_D16U32 = 5,
/* 16-bit scalar BigFloat data value in memory, packed into 32-bit float
* value in register.
*/
LSC_DATA_SIZE_D16BF32 = 6,
};
/*
* Enum specifies the scope of the fence.
*/
enum PACKED lsc_fence_scope {
/* Wait until all previous memory transactions from this thread are observed
* within the local thread-group.
*/
LSC_FENCE_THREADGROUP = 0,
/* Wait until all previous memory transactions from this thread are observed
* within the local sub-slice.
*/
LSC_FENCE_LOCAL = 1,
/* Wait until all previous memory transactions from this thread are observed
* in the local tile.
*/
LSC_FENCE_TILE = 2,
/* Wait until all previous memory transactions from this thread are observed
* in the local GPU.
*/
LSC_FENCE_GPU = 3,
/* Wait until all previous memory transactions from this thread are observed
* across all GPUs in the system.
*/
LSC_FENCE_ALL_GPU = 4,
/* Wait until all previous memory transactions from this thread are observed
* at the "system" level.
*/
LSC_FENCE_SYSTEM_RELEASE = 5,
/* For GPUs that do not follow PCIe Write ordering for downstream writes
* targeting device memory, a fence message with scope=System_Acquire will
* commit to device memory all downstream and peer writes that have reached
* the device.
*/
LSC_FENCE_SYSTEM_ACQUIRE = 6,
};
/*
* Specifies the type of cache flush operation to perform after a fence is
* complete.
*/
enum PACKED lsc_flush_type {
LSC_FLUSH_TYPE_NONE = 0,
/*
* For a R/W cache, evict dirty lines (M to I state) and invalidate clean
* lines. For a RO cache, invalidate clean lines.
*/
LSC_FLUSH_TYPE_EVICT = 1,
/*
* For both R/W and RO cache, invalidate clean lines in the cache.
*/
LSC_FLUSH_TYPE_INVALIDATE = 2,
/*
* For a R/W cache, invalidate dirty lines (M to I state), without
* write-back to next level. This opcode does nothing for a RO cache.
*/
LSC_FLUSH_TYPE_DISCARD = 3,
/*
* For a R/W cache, write-back dirty lines to the next level, but kept in
* the cache as "clean" (M to V state). This opcode does nothing for a RO
* cache.
*/
LSC_FLUSH_TYPE_CLEAN = 4,
/*
* Flush "RW" section of the L3 cache, but leave L1 and L2 caches untouched.
*/
LSC_FLUSH_TYPE_L3ONLY = 5,
};
enum PACKED lsc_backup_fence_routing {
/* Normal routing: UGM fence is routed to UGM pipeline. */
LSC_NORMAL_ROUTING,
/* Route UGM fence to LSC unit. */
LSC_ROUTE_TO_LSC,
};
/*
* Specifies the size of the vector in a dataport message.
*/
enum PACKED lsc_vect_size {
LSC_VECT_SIZE_V1 = 0, /* vector length 1 */
LSC_VECT_SIZE_V2 = 1, /* vector length 2 */
LSC_VECT_SIZE_V3 = 2, /* Vector length 3 */
LSC_VECT_SIZE_V4 = 3, /* Vector length 4 */
LSC_VECT_SIZE_V8 = 4, /* Vector length 8 */
LSC_VECT_SIZE_V16 = 5, /* Vector length 16 */
LSC_VECT_SIZE_V32 = 6, /* Vector length 32 */
LSC_VECT_SIZE_V64 = 7, /* Vector length 64 */
};
#define LSC_ONE_ADDR_REG 1
#endif /* BRW_EU_DEFINES_H */