intel/fs: enable extended bindless surface offset
Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:

committed by
Marge Bot

parent
01fc9a06bd
commit
6d6877bf99
@@ -114,6 +114,13 @@ struct brw_compiler {
|
||||
*/
|
||||
bool indirect_ubos_use_sampler;
|
||||
|
||||
/**
|
||||
* Gfx12.5+ has a bit in the SEND instruction extending the bindless
|
||||
* surface offset range from 20 to 26 bits, effectively giving us 4Gb of
|
||||
* bindless surface descriptors instead of 64Mb previously.
|
||||
*/
|
||||
bool extended_bindless_surface_offset;
|
||||
|
||||
struct nir_shader *clc_shader;
|
||||
};
|
||||
|
||||
|
@@ -1880,18 +1880,18 @@ lsc_disassemble_ex_desc(const struct intel_device_info *devinfo,
|
||||
const unsigned addr_type = lsc_msg_desc_addr_type(devinfo, imm_desc);
|
||||
switch (addr_type) {
|
||||
case LSC_ADDR_SURFTYPE_FLAT:
|
||||
format(file, "base_offset %u ",
|
||||
format(file, " base_offset %u ",
|
||||
lsc_flat_ex_desc_base_offset(devinfo, imm_ex_desc));
|
||||
break;
|
||||
case LSC_ADDR_SURFTYPE_BSS:
|
||||
case LSC_ADDR_SURFTYPE_SS:
|
||||
format(file, "surface_state_index %u ",
|
||||
format(file, " surface_state_index %u ",
|
||||
lsc_bss_ex_desc_index(devinfo, imm_ex_desc));
|
||||
break;
|
||||
case LSC_ADDR_SURFTYPE_BTI:
|
||||
format(file, "BTI %u ",
|
||||
format(file, " BTI %u ",
|
||||
lsc_bti_ex_desc_index(devinfo, imm_ex_desc));
|
||||
format(file, "base_offset %u ",
|
||||
format(file, " base_offset %u ",
|
||||
lsc_bti_ex_desc_base_offset(devinfo, imm_ex_desc));
|
||||
break;
|
||||
default:
|
||||
@@ -2469,11 +2469,13 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
|
||||
if (space)
|
||||
string(file, " ");
|
||||
}
|
||||
if (devinfo->verx10 >= 125 && brw_inst_send_ex_bso(devinfo, inst))
|
||||
format(file, " ex_bso");
|
||||
if (brw_sfid_is_lsc(sfid)) {
|
||||
lsc_disassemble_ex_desc(devinfo, imm_desc, imm_ex_desc, file);
|
||||
} else {
|
||||
if (has_imm_desc)
|
||||
format(file, "mlen %u", brw_message_desc_mlen(devinfo, imm_desc));
|
||||
format(file, " mlen %u", brw_message_desc_mlen(devinfo, imm_desc));
|
||||
if (has_imm_ex_desc) {
|
||||
format(file, " ex_mlen %u",
|
||||
brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
|
||||
|
@@ -1721,6 +1721,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
||||
struct brw_reg ex_desc,
|
||||
unsigned ex_desc_imm,
|
||||
bool ex_desc_scratch,
|
||||
bool ex_bso,
|
||||
bool eot);
|
||||
|
||||
void brw_ff_sync(struct brw_codegen *p,
|
||||
|
@@ -2739,6 +2739,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
||||
struct brw_reg ex_desc,
|
||||
unsigned ex_desc_imm,
|
||||
bool ex_desc_scratch,
|
||||
bool ex_bso,
|
||||
bool eot)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
@@ -2777,6 +2778,12 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
||||
!ex_desc_scratch &&
|
||||
(devinfo->ver >= 12 ||
|
||||
((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
|
||||
/* ATS-M PRMs, Volume 2d: Command Reference: Structures,
|
||||
* EU_INSTRUCTION_SEND instruction
|
||||
*
|
||||
* "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
|
||||
*/
|
||||
assert(!ex_bso);
|
||||
ex_desc.ud |= ex_desc_imm;
|
||||
} else {
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
@@ -2799,7 +2806,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
||||
* descriptor which comes from the address register. If we don't OR
|
||||
* those two bits in, the external unit may get confused and hang.
|
||||
*/
|
||||
unsigned imm_part = ex_desc_imm | sfid | eot << 5;
|
||||
unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);
|
||||
|
||||
if (ex_desc_scratch) {
|
||||
/* Or the scratch surface offset together with the immediate part of
|
||||
@@ -2852,6 +2859,10 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
||||
brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
|
||||
}
|
||||
|
||||
if (ex_bso) {
|
||||
brw_inst_set_send_ex_bso(devinfo, send, true);
|
||||
brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
|
||||
}
|
||||
brw_inst_set_sfid(devinfo, send, sfid);
|
||||
brw_inst_set_eot(devinfo, send, eot);
|
||||
}
|
||||
|
@@ -343,7 +343,8 @@ fs_generator::generate_send(fs_inst *inst,
|
||||
*/
|
||||
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
||||
desc, desc_imm, ex_desc, ex_desc_imm,
|
||||
inst->send_ex_desc_scratch, inst->eot);
|
||||
inst->send_ex_desc_scratch,
|
||||
inst->send_ex_bso, inst->eot);
|
||||
if (inst->check_tdr)
|
||||
brw_inst_set_opcode(p->isa, brw_last_inst,
|
||||
devinfo->ver >= 12 ? BRW_OPCODE_SENDC : BRW_OPCODE_SENDSC);
|
||||
|
@@ -620,8 +620,10 @@ FC(send_sel_reg32_desc, /* 4+ */ 77, 77, /* 12+ */ 48, 48, devinfo->ver
|
||||
FC(send_sel_reg32_ex_desc, /* 4+ */ 61, 61, /* 12+ */ 49, 49, devinfo->ver >= 9)
|
||||
F8(send_src0_reg_file, /* 4+ */ 38, 37, /* 8+ */ 42, 41, /* 12+ */ 66, 66)
|
||||
FC(send_src1_reg_nr, /* 4+ */ 51, 44, /* 12+ */ 111, 104, devinfo->ver >= 9)
|
||||
FC(send_src1_len, /* 4+ */ -1, -1, /* 12+ */ 103, 99, devinfo->verx10 >= 125)
|
||||
FC(send_src1_reg_file, /* 4+ */ 36, 36, /* 12+ */ 98, 98, devinfo->ver >= 9)
|
||||
FC(send_dst_reg_file, /* 4+ */ 35, 35, /* 12+ */ 50, 50, devinfo->ver >= 9)
|
||||
FC(send_ex_bso, /* 4+ */ -1, -1, /* 12+ */ 39, 39, devinfo->verx10 >= 125)
|
||||
/** @} */
|
||||
|
||||
/* Message descriptor bits */
|
||||
|
@@ -177,6 +177,9 @@ struct backend_instruction {
|
||||
* the scratch surface offset to build
|
||||
* extended descriptor
|
||||
*/
|
||||
bool send_ex_bso:1; /**< Only for SHADER_OPCODE_SEND, use extended bindless
|
||||
* surface offset (26bits instead of 20bits)
|
||||
*/
|
||||
bool predicate_trivial:1; /**< The predication mask applied to this
|
||||
* instruction is guaranteed to be uniform and
|
||||
* a superset of the execution mask of the
|
||||
|
@@ -808,6 +808,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
unsigned coord_components,
|
||||
unsigned grad_components)
|
||||
{
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const enum brw_reg_type payload_type =
|
||||
brw_reg_type_from_bit_size(payload_type_bit_size, BRW_REGISTER_TYPE_F);
|
||||
@@ -1153,6 +1154,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
||||
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||
} else {
|
||||
/* Immediate portion of the descriptor */
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
@@ -1344,6 +1346,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||
const fs_reg &surface, const fs_reg &surface_handle)
|
||||
{
|
||||
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
|
||||
/* We must have exactly one of surface and surface_handle */
|
||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||
@@ -1362,6 +1365,7 @@ setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
||||
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||
} else {
|
||||
inst->desc = desc;
|
||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||
@@ -1377,12 +1381,15 @@ setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
||||
uint32_t desc, const fs_reg &surface)
|
||||
{
|
||||
const ASSERTED intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
|
||||
inst->src[0] = brw_imm_ud(0); /* desc */
|
||||
|
||||
enum lsc_addr_surface_type surf_type = lsc_msg_desc_addr_type(devinfo, desc);
|
||||
switch (surf_type) {
|
||||
case LSC_ADDR_SURFTYPE_BSS:
|
||||
inst->send_ex_bso = compiler->extended_bindless_surface_offset;
|
||||
/* fall-through */
|
||||
case LSC_ADDR_SURFTYPE_SS:
|
||||
assert(surface.file != BAD_FILE);
|
||||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
@@ -1415,6 +1422,7 @@ setup_lsc_surface_descriptors(const fs_builder &bld, fs_inst *inst,
|
||||
static void
|
||||
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
|
||||
/* Get the logical send arguments. */
|
||||
@@ -1646,6 +1654,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
inst->header_size = header_sz;
|
||||
inst->send_has_side_effects = has_side_effects;
|
||||
inst->send_is_volatile = !has_side_effects;
|
||||
inst->send_ex_bso = surface_handle.file != BAD_FILE &&
|
||||
compiler->extended_bindless_surface_offset;
|
||||
|
||||
/* Set up SFID and descriptors */
|
||||
inst->sfid = sfid;
|
||||
@@ -1674,6 +1684,7 @@ lsc_bits_to_data_size(unsigned bit_size)
|
||||
static void
|
||||
lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
assert(devinfo->has_lsc);
|
||||
|
||||
@@ -1808,6 +1819,8 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
inst->header_size = 0;
|
||||
inst->send_has_side_effects = has_side_effects;
|
||||
inst->send_is_volatile = !has_side_effects;
|
||||
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||
compiler->extended_bindless_surface_offset;
|
||||
|
||||
inst->resize_sources(4);
|
||||
|
||||
@@ -1828,6 +1841,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
static void
|
||||
lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
assert(devinfo->has_lsc);
|
||||
|
||||
@@ -1893,6 +1907,8 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
inst->header_size = 0;
|
||||
inst->send_has_side_effects = has_side_effects;
|
||||
inst->send_is_volatile = !has_side_effects;
|
||||
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||
compiler->extended_bindless_surface_offset;
|
||||
|
||||
inst->resize_sources(4);
|
||||
|
||||
@@ -2322,6 +2338,8 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->sfid = GFX12_SFID_UGM;
|
||||
inst->resize_sources(3);
|
||||
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
|
||||
compiler->extended_bindless_surface_offset;
|
||||
|
||||
assert(!compiler->indirect_ubos_use_sampler);
|
||||
|
||||
@@ -3019,6 +3037,8 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
||||
/* Update the original instruction. */
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||
inst->send_ex_bso = surface_handle.file != BAD_FILE &&
|
||||
compiler->extended_bindless_surface_offset;
|
||||
inst->ex_mlen = 0;
|
||||
inst->header_size = 0;
|
||||
inst->send_has_side_effects = false;
|
||||
|
Reference in New Issue
Block a user