mi-builder: add read/write memory fencing support on Gfx20+

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29571>
This commit is contained in:
Lionel Landwerlin
2024-06-06 11:39:57 +03:00
committed by Marge Bot
parent 3b88a77b45
commit 86813c60a4
6 changed files with 93 additions and 1 deletions

View File

@@ -93,6 +93,9 @@ struct iris_batch {
/** Last binder address set in this hardware context. */
uint64_t last_binder_address;
/** Write fencing status for mi_builder. */
bool write_fence_status;
union {
struct {
uint32_t ctx_id;

View File

@@ -31,6 +31,7 @@
#define __gen_address_type struct iris_address
#define __gen_user_data struct iris_batch
#define __gen_combine_address iris_combine_address
#define __gen_get_write_fencing_status(b) (&(b)->write_fence_status)
static inline void *
__gen_get_batch_dwords(struct iris_batch *batch, unsigned dwords)

View File

@@ -43,6 +43,10 @@
#define MI_BUILDER_DEFAULT_WRITE_CHECK true
#endif
#ifndef MI_BUILDER_RAW_MEM_FENCING
#define MI_BUILDER_RAW_MEM_FENCING GFX_VER >= 20
#endif
/** These must be defined by the user of the builder
*
* void *__gen_get_batch_dwords(__gen_user_data *user_data,
@@ -65,6 +69,16 @@
* a fully valid 64-bit address.
*/
/**
* On Gfx20+ this must also be defined by the user of the builder
*
* bool *
* __gen_get_write_fencing_status(__gen_user_data *user_data);
*
* Returns a pointer to a boolean tracking the status of fencing for MI
* commands writing to memory.
*/
/*
* Start of the actual MI builder
*/
@@ -86,7 +100,6 @@
#define mi_builder_emit(b, cmd, name) \
mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
enum mi_value_type {
MI_VALUE_TYPE_IMM,
MI_VALUE_TYPE_MEM32,
@@ -140,6 +153,8 @@ struct mi_builder {
const struct intel_device_info *devinfo;
__gen_user_data *user_data;
bool no_read_write_fencing;
#if GFX_VERx10 >= 75
uint32_t gprs;
uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
@@ -169,6 +184,7 @@ mi_builder_init(struct mi_builder *b,
#if GFX_VER >= 12
b->write_check = MI_BUILDER_DEFAULT_WRITE_CHECK;
#endif
b->no_read_write_fencing = false;
#if GFX_VERx10 >= 75
b->gprs = 0;
b->num_math_dwords = 0;
@@ -360,6 +376,31 @@ mi_value_unref(struct mi_builder *b, struct mi_value val)
#endif /* GFX_VERx10 >= 75 */
}
/* On Gfx20+ memory read/write can be process unordered, so we need to track
* the writes to memory to make sure any memory read will see the effect of a
* previous write.
*/
static inline void
mi_builder_set_write(struct mi_builder *b)
{
#if MI_BUILDER_RAW_MEM_FENCING
*__gen_get_write_fencing_status(b->user_data) = true;
#endif
}
static inline void
mi_ensure_write_fence(struct mi_builder *b)
{
#if MI_BUILDER_RAW_MEM_FENCING
if (!b->no_read_write_fencing &&
*__gen_get_write_fencing_status(b->user_data)) {
mi_builder_emit(b, GENX(MI_MEM_FENCE), fence)
fence.FenceType = FENCE_TYPE_MI_WRITE;
*__gen_get_write_fencing_status(b->user_data) = false;
}
#endif
}
static inline struct mi_value
mi_imm(uint64_t imm)
{
@@ -460,6 +501,10 @@ _mi_copy_no_unref(struct mi_builder *b,
#endif
mi_builder_flush_math(b);
if (src.type == MI_VALUE_TYPE_MEM64 ||
src.type == MI_VALUE_TYPE_MEM32)
mi_ensure_write_fence(b);
switch (dst.type) {
case MI_VALUE_TYPE_IMM:
unreachable("Cannot copy to an immediate");
@@ -633,6 +678,16 @@ _mi_copy_no_unref(struct mi_builder *b,
default:
unreachable("Invalid mi_value type");
}
if (dst.type == MI_VALUE_TYPE_MEM64 ||
dst.type == MI_VALUE_TYPE_MEM32) {
/* Immediate writes can already wait for writes, so no need to do
* additional fencing later.
*/
if (src.type != MI_VALUE_TYPE_IMM || !mi_builder_write_checked(b))
mi_builder_set_write(b);
}
}
#if GFX_VERx10 >= 75
@@ -685,6 +740,10 @@ mi_memcpy(struct mi_builder *b, __gen_address_type dst,
assert(b->num_math_dwords == 0);
#endif
/* Flush once only */
mi_ensure_write_fence(b);
b->no_read_write_fencing = true;
/* Hold off write checks until the last write. */
bool write_check = mi_builder_write_checked(b);
mi_builder_set_write_check(b, false);
@@ -709,6 +768,8 @@ mi_memcpy(struct mi_builder *b, __gen_address_type dst,
mi_store(b, dst_val, tmp_reg);
#endif
}
b->no_read_write_fencing = false;
}
/*
@@ -776,6 +837,8 @@ mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
}
}
mi_builder_set_write(b);
mi_value_unref(b, src);
mi_value_unref(b, dst);
}
@@ -1260,6 +1323,7 @@ mi_store_relocated_imm(struct mi_builder *b, struct mi_value dst)
sdm.Address = dst.addr;
}
token.ptr[0] = dw + GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 32;
mi_builder_set_write(b);
break;
case MI_VALUE_TYPE_MEM64:
@@ -1272,6 +1336,7 @@ mi_store_relocated_imm(struct mi_builder *b, struct mi_value dst)
}
token.ptr[0] = &dw[GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 32];
token.ptr[1] = &dw[GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 32 + 1];
mi_builder_set_write(b);
break;
case MI_VALUE_TYPE_REG32:
@@ -1359,6 +1424,7 @@ mi_store_relocated_address_reg64(struct mi_builder *b, struct mi_value addr_reg)
}
}
mi_builder_set_write(b);
mi_value_unref(b, addr_reg);
return token;
}
@@ -1406,6 +1472,8 @@ MUST_CHECK static inline struct mi_value
mi_load_mem64_offset(struct mi_builder *b,
__gen_address_type addr, struct mi_value offset)
{
mi_ensure_write_fence(b);
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
struct mi_value addr_val = mi_imm(addr_u64);
@@ -1451,6 +1519,9 @@ mi_store_mem64_offset(struct mi_builder *b,
* registers to flush math afterwards so we don't confuse anyone.
*/
mi_builder_flush_math(b);
/* mi_builder_set_write() is not required here because we have a FENCE_WR
* in the ALU instruction.
*/
}
/*

View File

@@ -52,6 +52,7 @@ uint64_t __gen_combine_address(mi_builder_test *test, void *location,
void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
struct address __gen_get_batch_address(mi_builder_test *test,
void *location);
bool *__gen_get_write_fencing_status(mi_builder_test *test);
struct address
__gen_address_offset(address addr, uint64_t offset)
@@ -157,6 +158,8 @@ public:
char *output;
uint64_t canary;
bool write_fence_status;
mi_builder b;
};
@@ -314,6 +317,8 @@ mi_builder_test::SetUp()
memset(data_map, 139, DATA_BO_SIZE);
memset(&canary, 139, sizeof(canary));
write_fence_status = false;
struct isl_device isl_dev;
isl_device_init(&isl_dev, &devinfo);
mi_builder_init(&b, &devinfo, this);
@@ -407,6 +412,12 @@ __gen_combine_address(mi_builder_test *test, void *location,
#endif
}
bool *
__gen_get_write_fencing_status(mi_builder_test *test)
{
return &test->write_fence_status;
}
void *
__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
{

View File

@@ -2288,6 +2288,11 @@ struct anv_batch {
enum intel_engine_class engine_class;
/**
* Write fencing status for mi_builder.
*/
bool write_fence_status;
/**
* Number of 3DPRIMITIVE's emitted for WA 16014538804
*/

View File

@@ -19,4 +19,5 @@
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
#define __gen_get_batch_address(b, a) anv_batch_address(b, a)
#define __gen_get_write_fencing_status(b) (&(b)->write_fence_status)
#include "common/mi_builder.h"